Ejemplo n.º 1
0
def backupFile(filename, backup=None, backup_ext='.BAK', **kwargs):
    """Rename *filename* with *backup_ext* appended to its name for backup
    purposes, if *backup* is **True** or if automatic backups is turned on
    using :func:`.confProDy`.  Default extension :file:`.BAK` is used when
    one is not set using :func:`.confProDy`.  If *filename* does not exist,
    no action will be taken and *filename* will be returned.  If file is
    successfully renamed, new filename will be returned."""

    try:
        exists = isfile(filename)
    except Exception as err:
        raise TypeError('filename must be a string ({0})'.format(str(err)))

    from prody import SETTINGS
    if exists and (backup or SETTINGS.get('backup', False)):
        if backup_ext == '.BAK':
            backup_ext = SETTINGS.get('backup_ext', '.BAK')
        bak = filename + backup_ext
        if isfile(bak):
            try:
                os.remove(bak)
            except Exception as err:
                pass
        try:
            os.rename(filename, bak)
        except Exception as err:
            pass
        return bak
    else:
        return filename
Ejemplo n.º 2
0
def wwPDBServer(*key):
    """Set/get `wwPDB`_ FTP/HTTP server location used for downloading PDB
    structures.  Use one of the following keywords for setting a server:

    +---------------------------+-----------------------------+
    | wwPDB FTP server          | *Key* (case insensitive)    |
    +===========================+=============================+
    | RCSB PDB (USA) (default)  | RCSB, USA, US               |
    +---------------------------+-----------------------------+
    | PDBe (Europe)             | PDBe, Europe, Euro, EU      |
    +---------------------------+-----------------------------+
    | PDBj (Japan)              | PDBj, Japan, Jp             |
    +---------------------------+-----------------------------+

    .. _wwPDB: http://www.wwpdb.org/"""

    if not key:
        return SETTINGS.get('wwpdb', None)
    elif len(key) == 1:
        try:
            key = key[0].lower()
        except AttributeError:
            raise TypeError('key must be a string')
        if key in WWPDB_FTP_SERVERS:
            SETTINGS['wwpdb'] = key
            SETTINGS.save()
            LOGGER.info('wwPDB server is set to {}.'
                        .format(WWPDB_FTP_SERVERS[key][0]))
        else:
            raise ValueError('{0} is not a valid wwPDB server identifier'
                             .format(repr(key)))
    else:
        raise TypeError('one wwPDB server identifier is expected, {0} given'
                        .format(len(key)))
Ejemplo n.º 3
0
def pathVMD(*path):
    """Returns VMD path, or set it to be a user specified *path*."""

    if not path:
        path = SETTINGS.get('vmd', None)
        if isExecutable(path):
            return path
        else:
            LOGGER.warning('VMD path is not set by user, looking for it.')

            vmdbin = None
            vmddir = None
            if PLATFORM == 'Windows':
                if PY3K:
                    import winreg
                else:
                    import _winreg as winreg  # PY3K: OK
                for vmdversion in ('1.8.7', '1.9', '1.9.1'):
                    try:
                        key = winreg.OpenKey(
                            winreg.HKEY_LOCAL_MACHINE,
                            'Software\\University of Illinois\\VMD\\' +
                            vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
                    try:
                        key = winreg.OpenKey(
                            winreg.HKEY_LOCAL_MACHINE,
                            'Software\\WOW6432node\\University of Illinois\\VMD\\'
                            + vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
            else:
                vmdbin = which('vmd')
                if False:
                    pipe = os.popen('which vmd')
                    vmdbin = pipe.next().strip()
                    vmdfile = open(vmdbin)
                    for line in vmdfile:
                        if line.startswith('defaultvmddir='):
                            vmddir = line.split('=')[1].replace('"', '')
                            break
                    vmdfile.close()
            if isExecutable(vmdbin):
                setVMDpath(vmdbin)
                return vmdbin
    elif len(path) == 1:
        path = path[0]
        if isExecutable(path):
            SETTINGS['vmd'] = path
            SETTINGS.save()
            LOGGER.info("VMD path is set to '{0}'.".format(path))
        else:
            raise OSError('{0} is not executable.'.format(str(path)))
    else:
        raise ValueError('specify a single path string')
Ejemplo n.º 4
0
def backupFile(filename, backup=None, backup_ext='.BAK', **kwargs):
    """Rename *filename* with *backup_ext* appended to its name for backup
    purposes, if *backup* is **True** or if automatic backups is turned on
    using :func:`.confProDy`.  Default extension :file:`.BAK` is used when
    one is not set using :func:`.confProDy`.  If *filename* does not exist,
    no action will be taken and *filename* will be returned.  If file is
    successfully renamed, new filename will be returned."""

    try:
        exists = isfile(filename)
    except Exception as err:
        raise TypeError('filename must be a string ({0})'.format(str(err)))

    from prody import SETTINGS
    if exists and (backup or SETTINGS.get('backup', False)):
        if backup_ext == '.BAK':
            backup_ext = SETTINGS.get('backup_ext', '.BAK')
        bak = filename + backup_ext
        if isfile(bak):
            try:
                os.remove(bak)
            except Exception as err:
                pass
        try:
            os.rename(filename, bak)
        except Exception as err:
            pass
        return bak
    else:
        return filename
Ejemplo n.º 5
0
def updateDefinitions():
    """Update definitions and set some global variables.  This function must be
    called at the end of the module."""

    global DEFINITIONS, AMINOACIDS, BACKBONE, TIMESTAMP
    DEFINITIONS = {}
    user = SETTINGS.get('flag_definitions', {})
    
    # nucleics
    nucleic = set()
    for key in ['nucleobase', 'nucleoside', 'nucleotide']:
        aset = set(user.get(key, DEFAULTS[key]))
        nucleic.update(aset)
        DEFINITIONS[key] = aset
    DEFINITIONS['nucleic'] = nucleic
    
    # heteros
    for key in ['water', 'lipid', 'ion', 'sugar', 'heme', 
                 'at', 'cg', 'purine', 'pyrimidine',]:
        DEFINITIONS[key] = set(user.get(key, DEFAULTS[key]))
        
    DEFINITIONS['backbone'] = DEFINITIONS['bb'] = set(user.get(key, 
                                                           DEFAULTS['bb']))
    DEFINITIONS['backbonefull'] = DEFINITIONS['bbfull'] = set(user.get(key, 
                                                           DEFAULTS['bbfull']))

    # element regex
    for key in ['hydrogen', 'carbon', 'nitrogen', 'oxygen', 'sulfur']:
        DEFINITIONS[key] = recompile(user.get(key, DEFAULTS[key]))

    try:
        nonstd = SETTINGS[NONSTANDARD_KEY]
        
    except KeyError:
        nonstd = NONSTANDARD
        DEFINITIONS.update(CATEGORIZED)
    else:

        for cat in CATEGORIES:
            for key in CATEGORIES[cat]:
                DEFINITIONS[key] = set(DEFAULTS[key])

        DEFINITIONS['charged'] = set(DEFINITIONS['acidic'])
        DEFINITIONS['charged'].update(DEFINITIONS['basic'])

        for resi, props in nonstd.iteritems():
            for prop in props: 
                DEFINITIONS[prop].add(resi)

    DEFINITIONS['stdaa'] = DEFAULTS['stdaa']
    DEFINITIONS['nonstdaa'] = set(nonstd)
    AMINOACIDS = set(DEFINITIONS['stdaa'])
    AMINOACIDS.update(DEFINITIONS['nonstdaa'])
    DEFINITIONS['protein'] = DEFINITIONS['aminoacid'] = AMINOACIDS
    
    BACKBONE = DEFINITIONS['bb']

    global TIMESTAMP
    TIMESTAMP = SETTINGS.get('flag_timestamp', 0)
Ejemplo n.º 6
0
def changeDefinitions(**kwargs):

    defs = SETTINGS.get(DEFINITIONS_KEY, {})
    defs.update(kwargs)
    SETTINGS[DEFINITIONS_KEY] = defs
    SETTINGS[TIMESTAMP_KEY] = int(time())
    SETTINGS.save()
    updateDefinitions()
Ejemplo n.º 7
0
def changeDefinitions(**kwargs):
    
    defs = SETTINGS.get(DEFINITIONS_KEY, {})
    defs.update(kwargs)
    SETTINGS[DEFINITIONS_KEY] = defs
    SETTINGS[TIMESTAMP_KEY] = int(time())
    SETTINGS.save()
    updateDefinitions()
Ejemplo n.º 8
0
def pathVMD(*path):
    """Return VMD path, or set it to be a user specified *path*."""

    if not path:
        path = SETTINGS.get('vmd', None)
        if isExecutable(path):
            return path
        else:
            LOGGER.warning('VMD path is not set by user, looking for it.')

            vmdbin = None
            vmddir = None
            if PLATFORM == 'Windows':
                if PY3K:
                    import winreg
                else:
                    import _winreg as winreg  # PY3K: OK
                for vmdversion in ('1.8.7', '1.9', '1.9.1'):
                    try:
                        key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
                                'Software\\University of Illinois\\VMD\\' +
                                vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
                    try:
                        key = winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE,
                    'Software\\WOW6432node\\University of Illinois\\VMD\\' +
                    vmdversion)
                        vmddir = winreg.QueryValueEx(key, 'VMDDIR')[0]
                        vmdbin = join(vmddir, 'vmd.exe')
                    except:
                        pass
            else:
                vmdbin = which('vmd')
                if False:
                    pipe = os.popen('which vmd')
                    vmdbin = pipe.next().strip()
                    vmdfile = open(vmdbin)
                    for line in vmdfile:
                        if line.startswith('defaultvmddir='):
                            vmddir = line.split('=')[1].replace('"', '')
                            break
                    vmdfile.close()
            if isExecutable(vmdbin):
                setVMDpath(vmdbin)
                return vmdbin
    elif len(path) == 1:
        path = path[0]
        if isExecutable(path):
            SETTINGS['vmd'] = path
            SETTINGS.save()
            LOGGER.info("VMD path is set to '{0}'.".format(path))
        else:
            raise OSError('{0} is not executable.'.format(str(path)))
    else:
        raise ValueError('specify a single path string')
Ejemplo n.º 9
0
def setVMDpath(path):
    """Set path to a VMD executable."""

    if isExecutable(path):
        SETTINGS["vmd"] = path
        SETTINGS.save()
        LOGGER.info("VMD path is set to '{0:s}'.".format(path))
    else:
        raise OSError("{0:s} is not executable.".format(str(path)))
Ejemplo n.º 10
0
def getPDBLocalFolder():
    """Return the path to a local PDB folder and folder structure specifier. 
    If a local folder is not set, **None** will be returned."""

    folder = SETTINGS.get('pdb_local_folder')
    if folder:
        if isdir(folder):
            return folder, SETTINGS.get('pdb_local_divided', True)
        else:
            LOGGER.warning('PDB local folder {0:s} is not a accessible.'
                           .format(repr(folder)))
Ejemplo n.º 11
0
def setPDBMirrorPath(path):
    """Set the path to a local PDB mirror."""
    
    if not isinstance(path, str):
        raise TypeError('path must be a string')
    if isdir(path):
        path = abspath(path)
        LOGGER.info('Local PDB mirror path is set: {0:s}'.format(repr(path)))
        SETTINGS['pdb_mirror_path'] = path
        SETTINGS.save()
    else:
        raise IOError('No such directory: {0:s}'.format(repr(path)))
Ejemplo n.º 12
0
def addNonstdAminoacid(resname, *properties):
    """Add non-standard amino acid *resname* with *properties* selected from:
     
      * {props}
    
    >>> addNonstdAminoacid('PTR', 'acidic', 'aromatic', 'cyclic', 'large', 
    ... 'polar', 'surface')
    
    Default set of non-standard amino acids can be restored as follows:
    
    >>> flagDefinition(reset='nonstdaa')"""
    
    resname = str(resname)
    if len(resname) > 4:
        LOGGER.warn('Residue name {0:s} is unusually long.'
                    .format(repr(resname)))
    propset = set(properties)
    for cat, val in CATEGORIES.items():
        intersection = val.intersection(propset)
        if intersection:
            if len(intersection) > 1:
                raise ValueError('amino acid properties {0:s} cannot be '
                                   'present together'
                      .format(', '.join([repr(prp) for prp in intersection])))
            for prop in intersection:
                propset.remove(prop)
    if propset:
        raise ValueError('amino acid property {0:s} is not valid'
                           .format(repr(propset.pop())))
        
    nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)
    nonstd[resname] = set(properties)
    updateNonstandard(nonstd)    
Ejemplo n.º 13
0
 def savePickle(self, folder=None, filename=None):
     if folder is None:
         # define folder where to look for pickles
         folder = SETTINGS.get('rhapsody_local_folder', '.')
     if filename is None:
         # use the default filename, if possible
         if self.PDBID is None:
             # when a custom structure is used, there is no
             # default filename: the user should provide it
             raise ValueError('Please provide a filename.')
         filename = 'PDBfeatures-' + self.PDBID + '.pkl'
     pickle_path = os.path.join(folder, filename)
     # do not store GNM and ANM instances.
     # If a valid PDBID is present, do not store parsed PDB
     # as well, since it can be easily fetched again
     cache = (self._pdb, self._gnm, self._anm)
     if self.PDBID is not None:
         self._pdb = None
     self._gnm = {}
     self._anm = {}
     for env in ['chain', 'reduced', 'sliced']:
         self._gnm[env] = {chID: None for chID in self.chids}
         self._anm[env] = {chID: None for chID in self.chids}
     # write pickle
     pickle.dump(self, open(pickle_path, "wb"))
     # restore temporarily cached data
     self._pdb, self._gnm, self._anm = cache
     LOGGER.info("Pickle '{}' saved.".format(filename))
     return pickle_path
Ejemplo n.º 14
0
def pathPDBMirror(path=None, format=None):
    """Returns or specify PDB mirror path to be used by :func:`.fetchPDB`.
    To release the current mirror, pass an invalid path, e.g. ``path=''``.
    If you are keeping a partial mirror, such as PDB files in
    :file:`/data/structures/divided/pdb/` folder, specify *format*, which is
    ``'pdb'`` in this case."""

    if path is None:
        path = SETTINGS.get('pdb_mirror_path')
        format = SETTINGS.get('pdb_mirror_format', None)
        if path:
            if isdir(path):
                if format is None:
                    return path
                else:
                    return path, format
            else:
                LOGGER.warning(
                    'PDB mirror path {0} is not a accessible.'.format(
                        repr(path)))
    else:
        if isdir(path):
            path = abspath(path)
            LOGGER.info('Local PDB mirror path is set: {0}'.format(repr(path)))
            SETTINGS['pdb_mirror_path'] = path
            SETTINGS['pdb_mirror_format'] = format
            SETTINGS.save()
        else:
            current = SETTINGS.pop('pdb_mirror_path')
            if current:
                LOGGER.info('PDB mirror {0} is released.'.format(
                    repr(current)))
                SETTINGS.save()
            else:
                raise IOError('{0} is not a valid path.'.format(repr(path)))
Ejemplo n.º 15
0
def pathPDBMirror(path=None, format=None):
    """Returns or specify PDB mirror path to be used by :func:`.fetchPDB`.
    To release the current mirror, pass an invalid path, e.g. ``path=''``.
    If you are keeping a partial mirror, such as PDB files in
    :file:`/data/structures/divided/pdb/` folder, specify *format*, which is
    ``'pdb'`` in this case."""

    if path is None:
        path = SETTINGS.get('pdb_mirror_path')
        format = SETTINGS.get('pdb_mirror_format', None)
        if path:
            if isdir(path):
                if format is None:
                    return path
                else:
                    return path, format
            else:
                LOGGER.warning('PDB mirror path {0} is not a accessible.'
                               .format(repr(path)))
    else:
        if isdir(path):
            path = abspath(path)
            LOGGER.info('Local PDB mirror path is set: {0}'
                        .format(repr(path)))
            SETTINGS['pdb_mirror_path'] = path
            SETTINGS['pdb_mirror_format'] = format
            SETTINGS.save()
        else:
            current = SETTINGS.pop('pdb_mirror_path')
            if current:
                LOGGER.info('PDB mirror {0} is released.'
                            .format(repr(current)))
                SETTINGS.save()
            else:
                raise IOError('{0} is not a valid path.'.format(repr(path)))
Ejemplo n.º 16
0
def getVMDpath():
    """Return VMD path set by user or one identified automatically."""

    path = SETTINGS.get("vmd", None)
    if isExecutable(path):
        return path
    else:
        LOGGER.warning("VMD path is not set by user, looking for it.")

        from types import StringType, UnicodeType

        vmdbin = None
        vmddir = None
        if PLATFORM == "Windows":
            import _winreg

            for vmdversion in ("1.8.7", "1.9", "1.9.1"):
                try:
                    key = _winreg.OpenKey(
                        _winreg.HKEY_LOCAL_MACHINE, "Software\\University of Illinois\\VMD\\" + vmdversion
                    )
                    vmddir = _winreg.QueryValueEx(key, "VMDDIR")[0]
                    vmdbin = os.path.join(vmddir, "vmd.exe")
                except:
                    pass
                try:
                    key = _winreg.OpenKey(
                        _winreg.HKEY_LOCAL_MACHINE, "Software\\WOW6432node\\University of Illinois\\VMD\\" + vmdversion
                    )
                    vmddir = _winreg.QueryValueEx(key, "VMDDIR")[0]
                    vmdbin = os.path.join(vmddir, "vmd.exe")
                except:
                    pass
        else:
            vmdbin = which("vmd")
            if False:
                pipe = os.popen("which vmd")
                vmdbin = pipe.next().strip()
                vmdfile = open(vmdbin)
                for line in vmdfile:
                    if line.startswith("defaultvmddir="):
                        vmddir = line.split("=")[1].replace('"', "")
                        break
                vmdfile.close()
        if (
            False
            and isinstance(vmdbin, (StringType, UnicodeType))
            and isinstance(vmddir, (StringType, UnicodeType))
            and os.path.isfile(vmdbin)
            and os.path.isdir(vmddir)
        ):
            pass  # return vmdbin, vmddir
        if isExecutable(vmdbin):
            setVMDpath(vmdbin)
            return vmdbin
Ejemplo n.º 17
0
def getPDBMirrorPath():
    """Return the path to a local PDB mirror, or **None** if a mirror path is 
    not set."""

    path = SETTINGS.get('pdb_mirror_path')
    if path:
        if isdir(path):
            return path
        else:
            LOGGER.warning('PDB mirror path {0:s} is not a accessible.'
                           .format(repr(path)))
Ejemplo n.º 18
0
def pathPDBFolder(folder=None, divided=False):
    """Returns or specify local PDB folder for storing PDB files downloaded from
    `wwPDB <http://www.wwpdb.org/>`_ servers.  Files stored in this folder can
    be accessed via :func:`.fetchPDB` from any working directory.  To release
    the current folder, pass an invalid path, e.g. ``folder=''``.

    If *divided* is **True**, the divided folder structure of wwPDB servers
    will be assumed when reading from and writing to the local folder.  For
    example, a structure with identifier **1XYZ** will be present as
    :file:`pdblocalfolder/yz/pdb1xyz.pdb.gz`.

    If *divided* is **False**, a plain folder structure will be expected and
    adopted when saving files.  For example, the same structure will be
    present as :file:`pdblocalfolder/1xyz.pdb.gz`.

    Finally, in either case, lower case letters will be used and compressed
    files will be stored."""

    if folder is None:
        folder = SETTINGS.get('pdb_local_folder')
        if folder:
            if isdir(folder):
                return folder, SETTINGS.get('pdb_local_divided', True)
            else:
                LOGGER.warn('PDB local folder {0} is not a accessible.'.format(
                    repr(folder)))
    else:
        if isdir(folder):
            folder = abspath(folder)
            LOGGER.info('Local PDB folder is set: {0}'.format(repr(folder)))
            if divided:
                LOGGER.info('wwPDB divided folder structure will be assumed.')
            else:
                LOGGER.info('A plain folder structure will be assumed.')
            SETTINGS['pdb_local_folder'] = folder
            SETTINGS['pdb_local_divided'] = bool(divided)
            SETTINGS.save()
        else:
            current = SETTINGS.pop('pdb_local_folder')
            if current:
                LOGGER.info('PDB folder {0} is released.'.format(
                    repr(current)))
                SETTINGS.pop('pdb_local_divided')
                SETTINGS.save()
            else:
                raise IOError('{0} is not a valid path.'.format(repr(folder)))
Ejemplo n.º 19
0
def pathPDBFolder(folder=None, divided=False):
    """Returns or specify local PDB folder for storing PDB files downloaded from
    `wwPDB <http://www.wwpdb.org/>`_ servers.  Files stored in this folder can
    be accessed via :func:`.fetchPDB` from any working directory.  To release
    the current folder, pass an invalid path, e.g. ``folder=''``.

    If *divided* is **True**, the divided folder structure of wwPDB servers
    will be assumed when reading from and writing to the local folder.  For
    example, a structure with identifier **1XYZ** will be present as
    :file:`pdblocalfolder/yz/pdb1xyz.pdb.gz`.

    If *divided* is **False**, a plain folder structure will be expected and
    adopted when saving files.  For example, the same structure will be
    present as :file:`pdblocalfolder/1xyz.pdb.gz`.

    Finally, in either case, lower case letters will be used and compressed
    files will be stored."""

    if folder is None:
        folder = SETTINGS.get('pdb_local_folder')
        if folder:
            if isdir(folder):
                return folder, SETTINGS.get('pdb_local_divided', True)
            else:
                LOGGER.warn('PDB local folder {0} is not a accessible.'
                            .format(repr(folder)))
    else:
        if isdir(folder):
            folder = abspath(folder)
            LOGGER.info('Local PDB folder is set: {0}'.format(repr(folder)))
            if divided:
                LOGGER.info('wwPDB divided folder structure will be assumed.')
            else:
                LOGGER.info('A plain folder structure will be assumed.')
            SETTINGS['pdb_local_folder'] = folder
            SETTINGS['pdb_local_divided'] = bool(divided)
            SETTINGS.save()
        else:
            current = SETTINGS.pop('pdb_local_folder')
            if current:
                LOGGER.info('PDB folder {0} is released.'
                            .format(repr(current)))
                SETTINGS.pop('pdb_local_divided')
                SETTINGS.save()
            else:
                raise IOError('{0} is not a valid path.'.format(repr(folder)))
Ejemplo n.º 20
0
def pathEVmutationFolder(folder=None):
    """Returns or sets path of local folder where EVmutation data are stored.
    To release the current folder, pass an invalid path, e.g.
    ``folder=''``.
    """
    if folder is None:
        folder = SETTINGS.get('EVmutation_local_folder')
        if folder:
            if isdir(folder):
                return folder
            else:
                LOGGER.warn('Local folder {} is not accessible.'.format(
                    repr(folder)))
    else:
        if isdir(folder):
            folder = abspath(folder)
            LOGGER.info('Local EVmutation folder is set: {}'.format(
                repr(folder)))
            SETTINGS['EVmutation_local_folder'] = folder
            SETTINGS.save()
        else:
            current = SETTINGS.pop('EVmutation_local_folder')
            if current:
                LOGGER.info('EVmutation folder {0} is released.'.format(
                    repr(current)))
                SETTINGS.save()
            else:
                raise IOError('{} is not a valid path.'.format(repr(folder)))
Ejemplo n.º 21
0
def pathRhapsodyFolder(folder=None):
    """Returns or sets path of local folder where files and pickles necessary
    to run Rhapsody will be stored. To release the current folder, pass an
    invalid path, e.g. ``folder=''``.
    """
    if folder is None:
        folder = SETTINGS.get('rhapsody_local_folder')
        if folder:
            if isdir(folder):
                return folder
            else:
                LOGGER.warn('Local folder {} is not accessible.'.format(
                    repr(folder)))
    else:
        if isdir(folder):
            folder = abspath(folder)
            LOGGER.info('Local Rhapsody folder is set: {}'.format(
                repr(folder)))
            SETTINGS['rhapsody_local_folder'] = folder
            SETTINGS.save()
        else:
            current = SETTINGS.pop('rhapsody_local_folder')
            if current:
                LOGGER.info('Rhapsody folder {0} is released.'.format(
                    repr(current)))
                SETTINGS.save()
            else:
                raise IOError('{} is not a valid path.'.format(repr(folder)))
Ejemplo n.º 22
0
 def recoverPickle(self, filename=None, folder=None, days=30, **kwargs):
     acc = self.uniq_acc
     if acc is None:
         # assume acc is equal to uniq_acc
         acc = self.acc
     if folder is None:
         folder = SETTINGS.get('rhapsody_local_folder')
         if folder is None:
             folder = '.'
         else:
             folder = os.path.join(folder, 'pickles')
     if filename is None:
         # assume acc is equal to uniq_acc
         acc = self.acc
         filename = 'UniprotMap-' + acc + '.pkl'
         pickle_path = os.path.join(folder, filename)
         if not os.path.isfile(pickle_path):
             # import unique accession number
             acc = pd.queryUniprot(self.acc)['accession   0']
             filename = 'UniprotMap-' + acc + '.pkl'
             pickle_path = os.path.join(folder, filename)
     else:
         pickle_path = os.path.join(folder, filename)
     # check if pickle exists
     if not os.path.isfile(pickle_path):
         raise IOError("File '{}' not found".format(filename))
     # load pickle
     recovered_self = pickle.load(open(pickle_path, "rb"))
     if acc not in [recovered_self.acc, recovered_self.uniq_acc]:
         raise ValueError('Accession number in recovered pickle (%s) ' %
                          recovered_self.uniq_acc + 'does not match.')
     # check timestamp and ignore pickles that are too old
     date_format = "%Y-%m-%d %H:%M:%S.%f"
     t_old = datetime.datetime.strptime(recovered_self.timestamp,
                                        date_format)
     t_now = datetime.datetime.utcnow()
     Delta_t = datetime.timedelta(days=days)
     if t_old + Delta_t < t_now:
         raise RuntimeError(
             'Pickle {} was too old and was ignored.'.format(filename))
     self.fullRecord = recovered_self.fullRecord
     self.uniq_acc = recovered_self.uniq_acc
     self.sequence = recovered_self.sequence
     self.PDBrecords = recovered_self.PDBrecords
     self.PDBmappings = recovered_self.PDBmappings
     self.customPDBmappings = recovered_self.customPDBmappings
     self._align_algo_args = recovered_self._align_algo_args
     self._align_algo_kwargs = recovered_self._align_algo_kwargs
     self.timestamp = recovered_self.timestamp
     self.Pfam = recovered_self.Pfam
     LOGGER.info("Pickle '{}' recovered.".format(filename))
     return
Ejemplo n.º 23
0
def setWWPDBFTPServer(key):
    """Set the `wwPDB <http://www.wwpdb.org/>`_ FTP server used for downloading
    PDB structures when needed.  Use one of the following keywords for setting 
    a different server.
    
    +---------------------------+-----------------------------+
    | wwPDB FTP server          | *Key* (case insensitive)    |
    +===========================+=============================+
    | RCSB PDB (USA) (default)  | RCSB, USA, US               |
    +---------------------------+-----------------------------+
    | PDBe (Europe)             | PDBe, Europe, Euro, EU      |
    +---------------------------+-----------------------------+
    | PDBj (Japan)              | PDBj, Japan, Jp             |
    +---------------------------+-----------------------------+
    """
    
    server = WWPDB_FTP_SERVERS.get(key.lower())
    if server is not None:
        SETTINGS['wwpdb_ftp'] = server
        SETTINGS.save()
    else:
        LOGGER.warning('{0:s} is not a valid key.'.format(key))
Ejemplo n.º 24
0
def getNonstdProperties(resname):
    """Return properties of non-standard amino acid *resname*.
    
    >>> getNonstdProperties('PTR')
    ['acidic', 'aromatic', 'cyclic', 'large', 'polar', 'surface']"""
    
    try:
        alist = list(SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)[resname])
    except KeyError:
        raise ValueError('{0:s} is not a non-standard residue name'
                           .format(repr(resname)))
    else:
        alist.sort()
        return alist
Ejemplo n.º 25
0
def setPDBLocalFolder(folder, divided=False):
    """Set a local PDB folder.  Setting a local PDB folder will make 
    :func:`fetchPDB` function to seek that folder for presence of requested
    PDB files.  Also, files downloaded from `wwPDB <http://www.wwpdb.org/>`_ 
    FTP servers will be saved in this folder.  This may help users to store 
    PDB files in a single place and have access to them in different working 
    directories.
    
    If *divided* is **True**, the divided folder structure of wwPDB servers 
    will be assumed when reading from and writing to the local folder.  For 
    example, a structure with identifier **1XYZ** will be present as 
    :file:`pdblocalfolder/yz/pdb1xyz.pdb.gz`. 
    
    If *divided* is **False**, a plain folder structure will be expected and 
    adopted when saving files.  For example, the same structure will be 
    present as :file:`pdblocalfolder/1xyz.pdb.gz`.
    
    Finally, in either case, lower case letters will be used and compressed
    files will be stored."""
    
    if not isinstance(folder, str):
        raise TypeError('folder must be a string')
    assert isinstance(divided, bool), 'divided must be a boolean'
    if isdir(folder):
        folder = abspath(folder)
        LOGGER.info('Local PDB folder is set: {0:s}'.format(repr(folder)))
        if divided:
            LOGGER.info('When using local PDB folder, wwPDB divided '
                        'folder structure will be assumed.')
        else:
            LOGGER.info('When using local PDB folder, a plain folder structure '
                        'will be assumed.')
        SETTINGS['pdb_local_folder'] = folder
        SETTINGS['pdb_local_divided'] = divided
        SETTINGS.save()
    else:
        raise IOError('No such directory: {0:s}'.format(repr(folder)))
Ejemplo n.º 26
0
def listNonstdAAProps(resname):
    """Returns properties of non-standard amino acid *resname*.

    .. ipython:: python

       listNonstdAAProps('PTR')"""

    try:
        alist = list(SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)[resname])
    except KeyError:
        raise ValueError('{0} is not a non-standard residue name'.format(
            repr(resname)))
    else:
        alist.sort()
        return alist
Ejemplo n.º 27
0
 def savePickle(self, filename=None, folder=None, store_custom_PDBs=False):
     if folder is None:
         folder = SETTINGS.get('rhapsody_local_folder', '.')
     if filename is None:
         filename = 'UniprotMap-' + self.uniq_acc + '.pkl'
     pickle_path = os.path.join(folder, filename)
     cache = self.customPDBmappings
     if store_custom_PDBs is not True:
         # do not store alignments of custom PDBs
         self.customPDBmappings = []
     # save pickle
     pickle.dump(self, open(pickle_path, "wb"))
     self.customPDBmappings = cache
     LOGGER.info("Pickle '{}' saved.".format(filename))
     return pickle_path
Ejemplo n.º 28
0
def listNonstdAAProps(resname):
    """Returns properties of non-standard amino acid *resname*.

    .. ipython:: python

       listNonstdAAProps('PTR')"""

    try:
        alist = list(SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)[resname])
    except KeyError:
        raise ValueError('{0} is not a non-standard residue name'
                         .format(repr(resname)))
    else:
        alist.sort()
        return alist
Ejemplo n.º 29
0
    def savePickle(self, folder=None, filename=None):
        """Stores a pickle of the current class instance. The pickle will
        contain all information and precomputed features, but not GNM and ANM
        models. In case a PDBID is missing, the parsed PDB :class:`AtomGroup`
        is stored as well.

        :arg folder: path of the folder where the pickle will be saved. If not
            specified, the local Rhapsody installation folder will be used.
        :type folder: str
        :arg filename: name of the pickle. By default, the pickle will be
            saved as ``'PDBfeatures-[PDBID].pkl'``. If a PDBID is not defined,
            the user must provide a filename.
        :type filename: str
        :return: pickle path
        :rtype: str
        """
        if folder is None:
            # define folder where to look for pickles
            folder = SETTINGS.get('rhapsody_local_folder')
            if folder is None:
                folder = '.'
            else:
                folder = os.path.join(folder, 'pickles')
        if filename is None:
            # use the default filename, if possible
            if self.PDBID is None:
                # when a custom structure is used, there is no
                # default filename: the user should provide it
                raise ValueError('Please provide a filename.')
            filename = 'PDBfeatures-' + self.PDBID + '.pkl'
        pickle_path = os.path.join(folder, filename)
        # do not store GNM and ANM instances.
        # If a valid PDBID is present, do not store parsed PDB
        # as well, since it can be easily fetched again
        cache = (self._pdb, self._gnm, self._anm)
        if self.PDBID is not None:
            self._pdb = None
        self._gnm = {}
        self._anm = {}
        for env in ['chain', 'reduced', 'sliced']:
            self._gnm[env] = {chID: None for chID in self.chids}
            self._anm[env] = {chID: None for chID in self.chids}
        # write pickle
        pickle.dump(self, open(pickle_path, "wb"))
        # restore temporarily cached data
        self._pdb, self._gnm, self._anm = cache
        LOGGER.info("Pickle '{}' saved.".format(filename))
        return pickle_path
Ejemplo n.º 30
0
def getWWPDBFTPServer():
    """Return a tuple containing name, host, and path of the currently 
    set `wwPDB <http://www.wwpdb.org/>`_ FTP server."""
    
    server = SETTINGS.get('wwpdb_ftp', None)
    if server is None:
        LOGGER.warning('A wwPDB FTP server is not set, default FTP server '
                       'RCSB PDB is used. Use `setWWPDBFTPServer` function '
                       'to set a server close to your location.')
        return _WWPDB_RCSB
    else:
        if server[2].endswith('data/structures/divided/pdb/'):
            return (server[0], server[1], 
                    server[2][:-len('data/structures/divided/pdb/')])
        else:
            return server
Ejemplo n.º 31
0
 def recoverPickle(self, folder=None, filename=None, days=30, **kwargs):
     if folder is None:
         # define folder where to look for pickles
         folder = SETTINGS.get('rhapsody_local_folder', '.')
     if filename is None:
         # use the default filename, if possible
         if self.PDBID is not None:
             filename = 'PDBfeatures-' + self.PDBID + '.pkl'
         else:
             # when a custom structure is used, there is no
             # default filename: the user should provide it
             raise ValueError('Please provide a filename.')
     pickle_path = os.path.join(folder, filename)
     if not os.path.isfile(pickle_path):
         raise IOError("File '{}' not found".format(filename))
     recovered_self = pickle.load(open(pickle_path, "rb"))
     # check consistency of recovered data
     if self.PDBID is None:
         if self._pdb != recovered_self._pdb:
             raise ValueError(
                 'Incompatible PDB structure in recovered pickle.')
     elif self.PDBID != recovered_self.PDBID:
         raise ValueError(
             'PDBID in recovered pickle ({}) does not match.'.format(
                 recovered_self.PDBID))
     if self.n_modes != recovered_self.n_modes:
         raise ValueError(
             'Num. of modes in recovered pickle ({}) does not match.'.
             format(recovered_self.n_modes))
     # check timestamp and ignore pickles that are too old
     date_format = "%Y-%m-%d %H:%M:%S.%f"
     t_old = datetime.datetime.strptime(recovered_self.timestamp,
                                        date_format)
     t_now = datetime.datetime.utcnow()
     Delta_t = datetime.timedelta(days=days)
     if t_old + Delta_t < t_now:
         raise RuntimeError('Pickle was too old and was ignored.')
     # import recovered data
     self.chids = recovered_self.chids
     self.resids = recovered_self.resids
     self.feats = recovered_self.feats
     self._gnm = recovered_self._gnm
     self._anm = recovered_self._anm
     self.timestamp = recovered_self.timestamp
     LOGGER.info("Pickle '{}' recovered.".format(filename))
     return
Ejemplo n.º 32
0
def delNonstdAminoacid(resname):
    """Delete non-standard amino acid *resname*.
    
    >>> delNonstdAminoacid('PTR')
    >>> flagDefinition('nonstdaa') # doctest: +ELLIPSIS
    ['ASX', 'CSO', 'GLX', ..., 'TPO', 'XAA', 'XLE']
    
    Default set of non-standard amino acids can be restored as follows:
    
    >>> flagDefinition(reset='nonstdaa')"""
    
    
    nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)
    try:
        nonstd.pop(resname)
    except KeyError:
        raise ValueError('{0:s} is not a non-standard residue name'
                           .format(repr(resname)))
    else:
        updateNonstandard(nonstd)
Ejemplo n.º 33
0
def delNonstdAminoacid(resname):
    """Delete non-standard amino acid *resname*.

    .. ipython:: python

       delNonstdAminoacid('PTR')
       flagDefinition('nonstdaa')

    Default set of non-standard amino acids can be restored as follows:

    .. ipython:: python

       flagDefinition(reset='nonstdaa')"""

    nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)
    try:
        nonstd.pop(resname)
    except KeyError:
        raise ValueError('{0} is not a non-standard residue name'
                         .format(repr(resname)))
    else:
        updateNonstandard(nonstd)
Ejemplo n.º 34
0
def delNonstdAminoacid(resname):
    """Delete non-standard amino acid *resname*.

    .. ipython:: python

       delNonstdAminoacid('PTR')
       flagDefinition('nonstdaa')

    Default set of non-standard amino acids can be restored as follows:

    .. ipython:: python

       flagDefinition(reset='nonstdaa')"""

    nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)
    try:
        nonstd.pop(resname)
    except KeyError:
        raise ValueError('{0} is not a non-standard residue name'.format(
            repr(resname)))
    else:
        updateNonstandard(nonstd)
Ejemplo n.º 35
0
def addNonstdAminoacid(resname, *properties):
    """Add non-standard amino acid *resname* with *properties* selected from:

      * {props}

    .. ipython:: python

       addNonstdAminoacid('PTR', 'acidic', 'aromatic', 'cyclic', 'large',
       'polar', 'surface')

    Default set of non-standard amino acids can be restored as follows:

    .. ipython:: python

       flagDefinition(reset='nonstdaa')"""

    resname = str(resname)
    if len(resname) > 4:
        LOGGER.warn('Residue name {0} is unusually long.'.format(
            repr(resname)))
    propset = set(properties)
    for cat, val in CATEGORIES.items():
        intersection = val.intersection(propset)
        if intersection:
            if len(intersection) > 1:
                raise ValueError('amino acid properties {0} cannot be '
                                 'present together'.format(', '.join(
                                     [repr(prp) for prp in intersection])))
            for prop in intersection:
                propset.remove(prop)
    if propset:
        raise ValueError('amino acid property {0} is not valid'.format(
            repr(propset.pop())))

    nonstd = SETTINGS.get(NONSTANDARD_KEY, NONSTANDARD)
    nonstd[resname] = set(properties)
    updateNonstandard(nonstd)
Ejemplo n.º 36
0
def parsePDBStream(stream, **kwargs):
    """Returns an :class:`.AtomGroup` and/or dictionary containing header data
    parsed from a stream of PDB lines.

    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)""" 
    
    model = kwargs.get('model')
    header = kwargs.get('header', False)
    assert isinstance(header, bool), 'header must be a boolean'
    chain = kwargs.get('chain')
    subset = kwargs.get('subset')
    altloc = kwargs.get('altloc', 'A')

    if model is not None:
        if isinstance(model, Integral):
            if model < 0:
                raise ValueError('model must be greater than 0')
        else:
            raise TypeError('model must be an integer, {0} is invalid'
                            .format(str(model)))
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'
                             .format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = chain + title_suffix
    ag = kwargs.pop('ag', None)
    if ag is not None:
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    elif model != 0:
        ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
        n_csets = 0

    biomol = kwargs.get('biomol', False)
    auto_secondary = None
    secondary = kwargs.get('secondary')
    if not secondary:
        auto_secondary = SETTINGS.get('auto_secondary')
        secondary = auto_secondary
    split = 0
    hd = None
    if model != 0:
        LOGGER.timeit()
        try:
            lines = stream.readlines()
        except AttributeError as err:
            try:
                lines = stream.read().split('\n')
            except AttributeError:
                raise err
        if not len(lines):
            raise ValueError('empty PDB file or stream')
        if header or biomol or secondary:
            hd, split = getHeaderDict(lines)
        _parsePDBLines(ag, lines, split, model, chain, subset, altloc)
        if ag.numAtoms() > 0:
            LOGGER.report('{0} atoms and {1} coordinate set(s) were '
                          'parsed in %.2fs.'.format(ag.numAtoms(),
                          ag.numCoordsets() - n_csets))
        else:
            ag = None
            LOGGER.warn('Atomic data could not be parsed, please '
                        'check the input file.')
    elif header:
        hd, split = getHeaderDict(stream)

    if ag is not None and isinstance(hd, dict):
        if secondary:
            if auto_secondary:
                try:
                    ag = assignSecstr(hd, ag)
                except ValueError:
                    pass
            else:
                ag = assignSecstr(hd, ag)
        if biomol:
            ag = buildBiomolecules(hd, ag)

            if isinstance(ag, list):
                LOGGER.info('Biomolecular transformations were applied, {0} '
                            'biomolecule(s) are returned.'.format(len(ag)))
            else:
                LOGGER.info('Biomolecular transformations were applied to the '
                            'coordinate data.')

    if model != 0:
        if header:
            return ag, hd
        else:
            return ag
    else:
        return hd
Ejemplo n.º 37
0
def parsePDBStream(stream, **kwargs):
    """Returns an :class:`.AtomGroup` and/or dictionary containing header data
    parsed from a stream of PDB lines.

    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    model = kwargs.get('model')
    header = kwargs.get('header', False)
    assert isinstance(header, bool), 'header must be a boolean'
    chain = kwargs.get('chain')
    subset = kwargs.get('subset')
    altloc = kwargs.get('altloc', 'A')
    if model is not None:
        if isinstance(model, Integral):
            if model < 0:
                raise ValueError('model must be greater than 0')
        else:
            raise TypeError('model must be an integer, {0} is invalid'.format(
                str(model)))
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'.format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = chain + title_suffix
    ag = None
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    elif model != 0:
        ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
        n_csets = 0

    biomol = kwargs.get('biomol', False)
    auto_secondary = None
    secondary = kwargs.get('secondary')
    if not secondary:
        auto_secondary = SETTINGS.get('auto_secondary')
        secondary = auto_secondary
    split = 0
    hd = None
    if model != 0:
        LOGGER.timeit()
        try:
            lines = stream.readlines()
        except AttributeError as err:
            try:
                lines = stream.read().split('\n')
            except AttributeError:
                raise err
        if not len(lines):
            raise ValueError('empty PDB file or stream')
        if header or biomol or secondary:
            hd, split = getHeaderDict(lines)
        _parsePDBLines(ag, lines, split, model, chain, subset, altloc)
        if ag.numAtoms() > 0:
            LOGGER.report('{0} atoms and {1} coordinate set(s) were '
                          'parsed in %.2fs.'.format(
                              ag.numAtoms(),
                              ag.numCoordsets() - n_csets))
        else:
            ag = None
            LOGGER.warn('Atomic data could not be parsed, please '
                        'check the input file.')
    elif header:
        hd, split = getHeaderDict(stream)

    if ag is not None and isinstance(hd, dict):
        if secondary:
            if auto_secondary:
                try:
                    ag = assignSecstr(hd, ag)
                except ValueError:
                    pass
            else:
                ag = assignSecstr(hd, ag)
        if biomol:
            ag = buildBiomolecules(hd, ag)

            if isinstance(ag, list):
                LOGGER.info('Biomolecular transformations were applied, {0} '
                            'biomolecule(s) are returned.'.format(len(ag)))
            else:
                LOGGER.info('Biomolecular transformations were applied to the '
                            'coordinate data.')

    if model != 0:
        if header:
            return ag, hd
        else:
            return ag
    else:
        return hd
Ejemplo n.º 38
0
def viewNMDinVMD(filename):
    """Start VMD in the current Python session and load NMD data."""

    vmd = SETTINGS.get("vmd")
    if vmd:
        os.system("{0:s} -e {1:s}".format(vmd, os.path.abspath(filename)))
Ejemplo n.º 39
0
def resetDefinitions(flag):
    
    
    if flag == 'all':
        SETTINGS.pop(DEFINITIONS_KEY, None)
        SETTINGS.pop(NONSTANDARD_KEY, None)
        SETTINGS[TIMESTAMP_KEY] = int(time())
        SETTINGS.save()
        updateDefinitions()
    elif flag == 'nonstdaa': 
        SETTINGS.pop(NONSTANDARD_KEY, None)
        SETTINGS[TIMESTAMP_KEY] = int(time())
        SETTINGS.save()
        updateDefinitions()
    else:        
        try:
            SETTINGS.pop(DEFINITIONS_KEY, {}).pop(flag)
        except KeyError:
            pass
        else:
            SETTINGS[TIMESTAMP_KEY] = int(time())
            SETTINGS.save()
            updateDefinitions()
Ejemplo n.º 40
0
def print_sat_mutagen_figure(filename,
                             rhapsody_obj,
                             res_interval=None,
                             PolyPhen2=True,
                             EVmutation=True,
                             extra_plot=None,
                             fig_height=8,
                             fig_width=None,
                             dpi=300,
                             min_interval_size=15,
                             html=False,
                             main_clsf='main',
                             aux_clsf='aux.'):

    # check inputs
    assert isinstance(filename, str), 'filename must be a string'
    assert isinstance(rhapsody_obj, Rhapsody), 'not a Rhapsody object'
    assert rhapsody_obj._isColSet('main score'), 'predictions not found'
    assert rhapsody_obj._isSaturationMutagenesis(), 'unable to create figure'
    if res_interval is not None:
        assert isinstance(res_interval, tuple) and len(res_interval) == 2, \
               'res_interval must be a tuple of 2 values'
        assert res_interval[1] >= res_interval[0], 'invalid res_interval'
    if extra_plot is not None:
        assert len(extra_plot) == rhapsody_obj.numSAVs, \
               'length of additional predictions array is incorrect'
    assert isinstance(fig_height, (int, float))
    assert isinstance(dpi, int)

    matplotlib = _try_import_matplotlib()
    if matplotlib is None:
        return

    # delete extension from filename
    filename = os.path.splitext(filename)[0]

    # make sure that all variants belong to the same Uniprot sequence
    accs = [s.split()[0] for s in rhapsody_obj.data['SAV coords']]
    if len(set(accs)) != 1:
        m = 'Only variants from a single Uniprot sequence can be accepted'
        raise ValueError(m)

    # select an appropriate interval, based on available predictions
    seq_pos = [int(s.split()[1]) for s in rhapsody_obj.data['SAV coords']]
    res_min = np.min(seq_pos)
    res_max = np.max(seq_pos)
    upper_lim = res_max + min_interval_size

    # create empty (20 x num_res) mutagenesis tables
    table_best = np.zeros((20, upper_lim), dtype=float)
    table_best[:] = 'nan'
    table_main = table_best.copy()
    if extra_plot is not None:
        table_other = table_best.copy()
    if PolyPhen2:
        table_PP2 = table_best.copy()
    if EVmutation:
        table_EVmut = table_best.copy()

    # import pathogenicity probabilities from Rhapsody object
    p_best = rhapsody_obj.getPredictions(classifier='best')['path. prob.']
    p_main = rhapsody_obj.data['main path. prob.']
    if PolyPhen2:
        rhapsody_obj._calcPolyPhen2Predictions()
        p_PP2 = rhapsody_obj.data['PolyPhen-2 score']
    if EVmutation:
        rhapsody_obj._calcEVmutationPredictions()
        EVmut_score = np.array(rhapsody_obj.data['EVmutation score'])
        EVmut_cutoff = SETTINGS.get('EVmutation_metrics')['optimal cutoff']
        p_EVmut = -EVmut_score / EVmut_cutoff * 0.5

    # fill tables with predicted probability
    #  1:    deleterious
    #  0:    neutral
    # 'nan': no prediction/wt
    aa_list = 'ACDEFGHIKLMNPQRSTVWY'
    aa_map = {aa: i for i, aa in enumerate(aa_list)}
    for i, SAV in enumerate(rhapsody_obj.data['SAV coords']):
        aa_mut = SAV.split()[3]
        index = int(SAV.split()[1]) - 1
        table_best[aa_map[aa_mut], index] = p_best[i]
        table_main[aa_map[aa_mut], index] = p_main[i]
        if extra_plot is not None:
            table_other[aa_map[aa_mut], index] = extra_plot[i]
        if PolyPhen2:
            table_PP2[aa_map[aa_mut], index] = p_PP2[i]
        if EVmutation:
            table_EVmut[aa_map[aa_mut], index] = p_EVmut[i]

    # compute average pathogenicity profiles
    # NB: I expect to see RuntimeWarnings in this block
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        avg_p_best = np.nanmean(table_best, axis=0)
        avg_p_main = np.nanmean(table_main, axis=0)
        min_p = np.nanmin(table_best, axis=0)
        max_p = np.nanmax(table_best, axis=0)
        if extra_plot is not None:
            avg_p_other = np.nanmean(table_other, axis=0)
        if PolyPhen2:
            avg_p_PP2 = np.nanmean(table_PP2, axis=0)
        if EVmutation:
            avg_p_EVmut = np.nanmean(table_EVmut, axis=0)

    # use upper strip for showing additional info, such as PDB lengths
    upper_strip = np.zeros((1, upper_lim))
    upper_strip[:] = 'nan'
    PDB_sizes = np.zeros(upper_lim, dtype=int)
    PDB_coords = [''] * upper_lim
    for s in rhapsody_obj.data:
        index = int(s['SAV coords'].split()[1]) - 1
        if s['PDB size'] != 0:
            PDB_length = int(s['PDB size'])
            PDBID_chain = ':'.join(s['PDB SAV coords'][0].split()[:2])
            upper_strip[0, index] = PDB_length
            PDB_sizes[index] = PDB_length
            PDB_coords[index] = PDBID_chain
    max_PDB_size = max(PDB_sizes)
    if max_PDB_size != 0:
        upper_strip[0, :] /= max_PDB_size

    # PLOT FIGURE

    from matplotlib import pyplot as plt
    from matplotlib import gridspec as gridspec

    # portion of the sequence to display
    if res_interval is None:
        res_interval = (res_min, res_max)
    # adjust interval
    res_i, res_f = _adjust_res_interval(res_interval, upper_lim,
                                        min_interval_size)
    nres_shown = res_f - res_i + 1

    # figure proportions
    if fig_width is None:
        fig_width = fig_height / 2  # inches
        fig_width *= nres_shown / 20
    fig, ax = plt.subplots(3, 2, figsize=(fig_width, fig_height))
    wspace = 0.5  # inches
    plt.subplots_adjust(wspace=wspace / fig_width, hspace=0.15)

    # figure structure
    gs = gridspec.GridSpec(3,
                           2,
                           width_ratios=[nres_shown, 1],
                           height_ratios=[1, 20, 10])
    ax0 = plt.subplot(gs[0, 0])  # secondary structure strip
    ax1 = plt.subplot(gs[1, 0])  # mutagenesis table
    axcb = plt.subplot(gs[1, 1])  # colorbar
    ax2 = plt.subplot(gs[2, 0])  # average profile

    # padding for tick labels
    pad = 0.2 / fig_width

    # top strip
    matplotlib.cm.YlGn.set_bad(color='antiquewhite')
    ax0.imshow(upper_strip[0:1, res_i - 1:res_f],
               aspect='auto',
               cmap='YlGn',
               vmin=0,
               vmax=1)
    ax0.set_ylim((-0.45, .45))
    ax0.set_yticks([])
    ax0.set_ylabel(f'PDB size \n[0-{max_PDB_size} res] ',
                   fontsize=14,
                   ha='right',
                   va='center',
                   rotation=0)
    ax0.set_xticks(np.arange(5 - res_i % 5, res_f - res_i + 1, 5))
    ax0.set_xticklabels([])
    # add white grid
    ax0.set_xticks(np.arange(-.5, res_f - res_i + 1, 1), minor=True)
    ax0.tick_params(axis='both', which='minor', length=0)
    ax0.grid(which='minor', color='w', linestyle='-', linewidth=.5)

    # mutagenesis table (heatmap)
    matplotlib.cm.coolwarm.set_bad(color='#F9E79F')
    im = ax1.imshow(table_best[:, res_i - 1:res_f],
                    aspect='auto',
                    cmap='coolwarm',
                    vmin=0,
                    vmax=1)
    axcb.figure.colorbar(im, cax=axcb)
    ax1.set_yticks(np.arange(len(aa_list)))
    ax1.set_yticklabels(aa_list, ha='center', position=(-pad, 0), fontsize=14)
    ax1.set_xticks(np.arange(5 - res_i % 5, res_f - res_i + 1, 5))
    ax1.set_xticklabels([])
    ax1.set_ylabel('pathog. probability', labelpad=10)
    # add white grid
    ax1.set_xticks(np.arange(-.5, res_f - res_i + 1, 1), minor=True)
    ax1.set_yticks(np.arange(-.5, 20, 1), minor=True)
    ax1.tick_params(axis='both', which='minor', length=0)
    ax1.grid(which='minor', color='w', linestyle='-', linewidth=.5)

    # average pathogenicity profile
    x_resids = np.arange(1, upper_lim + 1)
    # shading showing range of values
    # NB: a bug in pyplot.fill_between() arises when selecting a region with
    # set_xlim() in a large plot (e.g. > 1000), causing the shaded area to
    # be plotted even though it's outside the selected region. As a workaround,
    # here I slice the plot to fit the selected region.
    sl = slice(max(0, res_i - 2), min(res_f + 2, upper_lim + 1))
    ax2.fill_between(x_resids[sl],
                     min_p[sl],
                     max_p[sl],
                     alpha=0.5,
                     edgecolor='salmon',
                     facecolor='salmon')
    # plot average profile for other predictions, if available
    if extra_plot is not None:
        ax2.plot(x_resids, avg_p_other, color='gray', lw=1)
    if PolyPhen2:
        ax2.plot(x_resids, avg_p_PP2, color='blue', lw=1)
    if EVmutation:
        ax2.plot(x_resids, avg_p_EVmut, color='green', lw=1)
    # solid line for predictions obtained with full classifier
    ax2.plot(x_resids, avg_p_main, 'ro-')
    # dotted line for predictions obtained with auxiliary classifier
    ax2.plot(x_resids, avg_p_best, 'ro-', markerfacecolor='none', ls='dotted')
    # cutoff line
    ax2.axhline(y=0.5, color='grey', lw=.8, linestyle='dashed')

    ax2.set_xlim((res_i - .5, res_f + .5))
    ax2.set_xlabel('residue number')
    ax2.set_ylim((-0.05, 1.05))
    ax2.set_ylabel('average', rotation=90, labelpad=10)
    ax2.set_yticklabels([])
    ax2r = ax2.twinx()
    ax2r.set_ylim((-0.05, 1.05))
    ax2r.set_yticks([0, .5, 1])
    ax2r.set_yticklabels(['0', '0.5', '1'])
    ax2r.tick_params(axis='both', which='major', pad=15)

    tight_padding = 0.1
    fig.savefig(filename + '.png',
                format='png',
                bbox_inches='tight',
                pad_inches=tight_padding,
                dpi=dpi)
    plt.close()
    plt.rcParams.update(plt.rcParamsDefault)
    LOGGER.info(f'Saturation mutagenesis figure saved to {filename}.png')

    # write a map in html format, to make figure clickable
    if html:
        all_axis = {'strip': ax0, 'table': ax1, 'bplot': ax2}

        # precompute some useful quantities for html code
        html_data = {}
        # dpi of printed figure
        html_data["dpi"] = dpi
        # figure size *before* tight
        html_data["fig_size"] = fig.get_size_inches()
        # tight bbox as used by fig.savefig()
        html_data["tight_bbox"] = fig.get_tightbbox(fig.canvas.get_renderer())
        # compute new origin and height, based on tight box and padding
        html_data["new_orig"] = html_data["tight_bbox"].min - tight_padding
        html_data["new_height"] = (html_data["tight_bbox"].height +
                                   2 * tight_padding)

        def get_area_coords(ax, d):
            assert ax_type in ("strip", "table", "bplot")
            # get bbox coordinates (x0, y0, x1, y1)
            bbox = ax.get_position().get_points()
            # get bbox coordinates in inches
            b_inch = bbox * d["fig_size"]
            # adjust bbox coordinates based on tight bbox
            b_adj = b_inch - d["new_orig"]
            # use html reference system (y = 1 - y)
            b_html = b_adj * np.array([1, -1]) + np.array([0, d["new_height"]])
            # convert to pixels
            b_px = (d["dpi"] * b_html).astype(int)
            b_px = np.sort(b_px, axis=0)
            # put in html format
            coords = '{},{},{},{}'.format(*b_px.flatten())
            # output
            return coords

        # html templates
        area_html = Template('<area shape="rect" coords="$coords" '
                             'id="{{map_id}}_$areaid" {{area_attrs}}> \n')

        # write html
        with open(filename + '.html', 'w') as f:
            f.write('<div>\n')
            f.write('<map name="{{map_id}}" id="{{map_id}}" {{map_attrs}}>\n')
            for ax_type, ax in all_axis.items():
                fields = {'areaid': ax_type}
                fields['coords'] = get_area_coords(ax, html_data)
                f.write(area_html.substitute(fields))
            f.write('</map>\n')
            f.write('</div>\n')

        # populate info table that will be passed as a javascript variable
        best_preds = rhapsody_obj.getPredictions()
        best_avg_preds = rhapsody_obj.getResAvgPredictions()
        PDB_coords = rhapsody_obj.getPDBcoords()
        abbrev = {
            '?': '?',
            'deleterious': 'del',
            'neutral': 'neu',
            'prob.delet.': 'p.del',
            'prob.neutral': 'p.neu'
        }
        info = {}
        for k in ['strip', 'table', 'bplot']:
            n_cols = 20 if k == 'table' else 1
            info[k] = [[''] * nres_shown for i in range(n_cols)]
        for i, row in enumerate(rhapsody_obj.data):
            SAV = row['SAV coords']
            acc, resid, aa_wt, aa_mut = SAV.split()
            resid = int(resid)
            # consider only residues shown in figure
            if not (res_i <= resid <= res_f):
                continue
            # SAV coordinates
            SAV_code = f'{aa_wt}{resid}{aa_mut}'
            # coordinates on table
            t_i = aa_map[aa_mut]
            t_j = resid - 1
            # coordinates on *shown* table
            ts_i = t_i
            ts_j = resid - res_i
            # compose message for table
            bp = best_preds[i]
            pprob = bp['path. prob.']
            pclass = bp['path. class']
            clsf = main_clsf if row['best classifier'] == 'main' else aux_clsf
            m = f'{SAV_code}: Rhapsody-{clsf} = {pprob:<3.2f} ({pclass})'
            if PolyPhen2:
                score = bp['PolyPhen-2 score']
                pclass = abbrev[bp['PolyPhen-2 path. class']]
                m += f', PolyPhen-2 = {score:<3.2f} ({pclass})'
            if EVmutation:
                score = bp['EVmutation score']
                pclass = abbrev[bp['EVmutation path. class']]
                m += f', EVmutation = {score:<3.2f} ({pclass})'
            if extra_plot is not None:
                score = table_other[t_i, t_j]
                m += f', other = {score:<3.2f}'
            info['table'][ts_i][ts_j] = m
            info['table'][aa_map[aa_wt]][ts_j] = f'{SAV_code[:-1]}: wild-type'
            if i % 19 == 0:
                # compose message for upper strip
                PDBID, ch, resid, aa, size = PDB_coords[i][[
                    'PDBID', 'chain', 'resid', 'resname', 'PDB size'
                ]]
                if size > 0:
                    m = f'{PDBID}:{ch}, resid {resid}, aa {aa}, size {size}'
                else:
                    m = 'no PDB found'
                info['strip'][0][ts_j] = m
                # compose message for bottom plot (residue-averages)
                bap = best_avg_preds[int(i / 19)]
                pprob = bap['path. prob.']
                pcl = bap['path. class']
                m = f'{SAV_code[:-1]}: Rhapsody-{clsf} = {pprob:<3.2f} ({pcl})'
                if PolyPhen2:
                    score = bap['PolyPhen-2 score']
                    pcl = abbrev[bap['PolyPhen-2 path. class']]
                    m += f', PolyPhen-2 = {score:<3.2f} ({pcl})'
                if EVmutation:
                    score = bap['EVmutation score']
                    pcl = abbrev[bap['EVmutation path. class']]
                    m += f', EVmutation = {score:<3.2f} ({pcl})'
                if extra_plot is not None:
                    score = avg_p_other[t_j]
                    m += f', other = {score:<3.2f}'
                info['bplot'][0][ts_j] = m

        def create_info_msg(ax_type, d):
            text = '[ \n'
            for row in d:
                text += '  ['
                for m in row:
                    text += f'"{m}",'
                text += '], \n'
            text += ']'
            return text

        area_js = Template('{{map_data}}["{{map_id}}_$areaid"] = { \n'
                           '  "img_id": "{{img_id}}", \n'
                           '  "map_id": "{{map_id}}", \n'
                           '  "coords": [$coords], \n'
                           '  "num_rows": $num_rows, \n'
                           '  "num_cols": $num_cols, \n'
                           '  "info_msg": $info_msg, \n'
                           '}; \n')

        # dump info in javascript format
        with open(filename + '.js', 'w') as f:
            f.write('var {{map_data}} = {}; \n')
            for ax_type, d in info.items():
                vars = {'areaid': ax_type}
                vars['coords'] = get_area_coords(all_axis[ax_type], html_data)
                vars['num_rows'] = 20 if ax_type == 'table' else 1
                vars['num_cols'] = nres_shown
                vars['info_msg'] = create_info_msg(ax_type, d)
                f.write(area_js.substitute(vars))

        return info
    return
Ejemplo n.º 41
0
def updateNonstandard(nonstd):
    
    SETTINGS[NONSTANDARD_KEY] = nonstd
    SETTINGS[TIMESTAMP_KEY] = int(time())
    SETTINGS.save()
    updateDefinitions()
Ejemplo n.º 42
0
    def recoverPickle(self, folder=None, filename=None, days=30, **kwargs):
        """Looks for precomputed pickle for the current PDB structure.

        :arg folder: path of folder where pickles are stored. If not specified,
            pickles will be searched for in the local Rhapsody installation
            folder.
        :type folder: str
        :arg filename: name of the pickle. If not specified, the default
            filename ``'PDBfeatures-[PDBID].pkl'`` will be used. If a PDBID is
            not found, user must specify a valid filename.
        :type filename: str
        :arg days: number of days after which a pickle will be considered too
            old and won't be recovered.
        :type days: int
        """
        if folder is None:
            # define folder where to look for pickles
            folder = SETTINGS.get('rhapsody_local_folder')
            if folder is None:
                folder = '.'
            else:
                folder = os.path.join(folder, 'pickles')
        if filename is None:
            # use the default filename, if possible
            if self.PDBID is not None:
                filename = 'PDBfeatures-' + self.PDBID + '.pkl'
            else:
                # when a custom structure is used, there is no
                # default filename: the user should provide it
                raise ValueError('Please provide a filename.')
        pickle_path = os.path.join(folder, filename)
        if not os.path.isfile(pickle_path):
            raise IOError("File '{}' not found".format(filename))
        recovered_self = pickle.load(open(pickle_path, "rb"))
        # check consistency of recovered data
        if self.PDBID is None:
            if self._pdb != recovered_self._pdb:
                raise ValueError('Incompatible PDB structure in recovered pickle.')
        elif self.PDBID != recovered_self.PDBID:
            raise ValueError('PDBID in recovered pickle ({}) does not match.'
                             .format(recovered_self.PDBID))
        if self.n_modes != recovered_self.n_modes:
            raise ValueError('Num. of modes in recovered pickle ({}) does not match.'
                             .format(recovered_self.n_modes))
        # check timestamp and ignore pickles that are too old
        date_format = "%Y-%m-%d %H:%M:%S.%f"
        t_old = datetime.datetime.strptime(
            recovered_self.timestamp, date_format)
        t_now = datetime.datetime.utcnow()
        Delta_t = datetime.timedelta(days=days)
        if t_old + Delta_t < t_now:
            raise RuntimeError('Pickle was too old and was ignored.')
        # import recovered data
        self.chids = recovered_self.chids
        self.resids = recovered_self.resids
        self.feats = recovered_self.feats
        self._gnm = recovered_self._gnm
        self._anm = recovered_self._anm
        self.timestamp = recovered_self.timestamp
        LOGGER.info("Pickle '{}' recovered.".format(filename))
        return
Ejemplo n.º 43
0
 def getResAvgPredictions(self,
                          resid=None,
                          classifier='best',
                          PolyPhen2=True,
                          EVmutation=True,
                          refresh=False):
     if not self._isSaturationMutagenesis():
         return None
     # initialize output array
     cols = [('sequence index', 'i4'), ('PDB SAV coords', 'U100'),
             ('PDBID', 'U100'), ('chain', 'U1'), ('resid', 'i4'),
             ('resname', 'U1'), ('PDB size', 'i4'), ('score', 'f4'),
             ('path. prob.', 'f4'), ('path. class', 'U12')]
     if PolyPhen2:
         cols.extend([('PolyPhen-2 score', 'f4'),
                      ('PolyPhen-2 path. class', 'U12')])
     if EVmutation:
         cols.extend([('EVmutation score', 'f4'),
                      ('EVmutation path. class', 'U12')])
     output = np.empty(int(self.numSAVs / 19), dtype=np.dtype(cols))
     # fetch unique SAV coords, PDB coords and predictions
     uSAVc = self.getUniqueSAVcoords()
     PDBc = self.getPDBcoords()
     preds = self.getPredictions(classifier=classifier,
                                 PolyPhen2=PolyPhen2,
                                 EVmutation=EVmutation,
                                 refresh=refresh)
     # compute residue-averaged quantities
     output['sequence index'] = self._calcResAvg(uSAVc['position'])
     for field in [
             'PDB SAV coords', 'PDBID', 'chain', 'resid', 'resname',
             'PDB size'
     ]:
         output[field] = self._calcResAvg(PDBc[field])
     # NB: I expect to see RuntimeWarnings in this block
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=RuntimeWarning)
         output['score'] = self._calcResAvg(preds['score'])
         pp = self._calcResAvg(preds['path. prob.'])
         pc = np.where(pp > 0.5, 'deleterious', 'neutral')
         pc = np.where(np.isnan(pp), '?', pc)
         output['path. prob.'] = pp
         output['path. class'] = pc
         if PolyPhen2:
             ps = self._calcResAvg(preds['PolyPhen-2 score'])
             pc = np.where(ps > 0.5, 'deleterious', 'neutral')
             pc = np.where(np.isnan(ps), '?', pc)
             output['PolyPhen-2 score'] = ps
             output['PolyPhen-2 path. class'] = pc
         if EVmutation:
             ps = self._calcResAvg(preds['EVmutation score'])
             cutoff = -SETTINGS.get('EVmutation_metrics')['optimal cutoff']
             pc = np.where(ps < cutoff, 'deleterious', 'neutral')
             pc = np.where(np.isnan(ps), '?', pc)
             output['EVmutation score'] = ps
             output['EVmutation path. class'] = pc
     if resid is None:
         return output
     elif isinstance(resid, int):
         return output[output['resid'] == resid][0]
     else:
         raise ValueError('Invalid resid.')
Ejemplo n.º 44
0
 def calcEvolProperties(self,
                        resid='all',
                        refresh=False,
                        folder=None,
                        max_cols=None,
                        max_seqs=25000,
                        **kwargs):
     ''' Computes Evol properties, i.e. Shannon entropy, Mutual
     Information and Direct Information, from Pfam Multiple
     Sequence Alignments, for a given residue.
     '''
     assert type(refresh) is bool
     # recover Pfam mapping (if not found already)
     self._searchPfam(refresh=refresh)
     if resid == 'all':
         PF_list = self.Pfam.keys()
     else:
         # get list of Pfam domains containing resid
         PF_list = [
             k for k in self.Pfam if any([
                 resid >= int(segment['start'])
                 and resid <= int(segment['end'])
                 for segment in self.Pfam[k]['locations']
             ])
         ]
         if len(PF_list) == 0:
             raise RuntimeError(
                 'No Pfam domain for resid {}.'.format(resid))
         if len(PF_list) > 1:
             LOGGER.warn('Residue {} is found in multiple '.format(resid) + \
                         '({}) Pfam domains.'.format(len(PF_list)))
     if folder is None:
         folder = SETTINGS.get('rhapsody_local_folder', './')
     # iterate over Pfam families
     for PF in PF_list:
         d = self.Pfam[PF]
         # skip if properties are pre-computed
         if not refresh and d.get('mapping') is not None:
             continue
         d['mapping'] = None
         d['ref_MSA'] = None
         d['entropy'] = np.nan
         d['MutInfo'] = np.nan
         d['DirInfo'] = np.nan
         try:
             LOGGER.info('Processing {}...'.format(PF))
             # fetch & parse MSA
             #               fname = PF + '_full.sth'
             #               fullname = os.path.join(folder, fname)
             #               if not os.path.isfile(fullname):
             #                   f = fetchPfamMSA(PF)
             #                   os.rename(f, fullname)
             #               msa = parseMSA(fullname, **kwargs)
             # fetch & parse MSA without saving downloaded MSA
             f = fetchPfamMSA(PF)
             msa = parseMSA(f, **kwargs)
             os.remove(f)
             # slice MSA to match all segments of the Uniprot sequence
             sliced_msa, indexes = self._sliceMSA(msa)
             #               if max_cols is not None and sliced_msa.numResidues() > max_cols:
             #                   raise Exception('Unable to compute DI: MSA has ' +\
             #                                   'too many columns (max: {}).'.format(max_cols))
             # get mapping between Uniprot sequence and Pfam domain
             d['mapping'] = self._mapUniprot2Pfam(PF, sliced_msa, indexes)
         except Exception as e:
             LOGGER.warn('{}: {}'.format(PF, e))
             d['mapping'] = str(e)
             continue
         try:
             # refine MSA ('seqid' param. is set as in PolyPhen-2)
             rowocc = 0.6
             while True:
                 sliced_msa = refineMSA(sliced_msa, rowocc=rowocc)
                 rowocc += 0.02
                 if sliced_msa.numSequences() <= max_seqs or rowocc >= 1:
                     break
             ref_msa = refineMSA(sliced_msa, seqid=0.94, **kwargs)
             d['ref_MSA'] = ref_msa
             # compute evolutionary properties
             d['entropy'] = calcShannonEntropy(ref_msa)
             d['MutInfo'] = buildMutinfoMatrix(ref_msa)
             # d['DirInfo'] = buildDirectInfoMatrix(ref_msa)
         except Exception as e:
             LOGGER.warn('{}: {}'.format(PF, e))
     return {k: self.Pfam[k] for k in PF_list}
Ejemplo n.º 45
0
def recoverEVmutFeatures(SAVs):
    """Compute EVmutation features by fetching precomputed scores from the
    downloaded local folder. If multiple values are found for a given variant,
    the average will be taken.

    :arg SAVs: list of SAV coordinates, e.g. ``'P17516 135 G E'``.
    :type SAVs: list or tuple of strings
    :return: an array of EVmutation features for each SAV
    :rtype: NumPy structured array
    """
    LOGGER.timeit('_EVmut')
    LOGGER.info('Recovering EVmutation data...')

    # extracts precomputed EVmutation scores for given mutants
    # NB:
    # negative DeltaE_epist --> deleterious effect
    # DeltaE_epist == 0     --> neutral effect (wild-type)
    # positive DeltaE_epist --> neutral/benign effect

    def find_matching_files(file_list, acc, pos):
        match_files = []
        for fname in [f for f in file_list if f.startswith(acc)]:
            basename = splitext(fname)[0]
            res_range = basename.split("_")[-1]
            res_i = int(res_range.split("-")[0])
            res_f = int(res_range.split("-")[1])
            if res_i <= int(pos) <= res_f:
                match_files.append(fname)
        return match_files

    feat_dtype = np.dtype([(f, 'f') for f in EVMUT_FEATS])
    features = np.zeros(len(SAVs), dtype=feat_dtype)
    features[:] = np.nan

    # recover EVmutation data
    EVmut_dir = SETTINGS.get('EVmutation_local_folder')
    if EVmut_dir is None:
        raise RuntimeError('EVmutation folder not set')
    file_list = [basename(f) for f in glob(join(EVmut_dir, '*.csv'))]
    if not file_list:
        raise RuntimeError('EVmutation folder does not contain any .csv files')
    for i, SAV in enumerate(SAVs):
        acc, pos, wt_aa, mut_aa = SAV.split()
        pos = int(pos)
        #       LOGGER.info('Recovering EVmutation data for {}.'.format(SAV))
        # find files containing given SAV coordinates
        match_files = find_matching_files(file_list, acc, pos)
        # recover data and average them if multiple values are found
        mutant = f'{wt_aa}{pos}{mut_aa}'
        data = []
        for fname in match_files:
            with open(join(EVmut_dir, fname), 'r') as f:
                for line in f:
                    if line.startswith(mutant):
                        ll = line.strip().split(';')[4:8]
                        data.append(ll)
                        break
        data = np.array(data, dtype=float)
        if len(data) == 0:
            # LOGGER.warn(f"EVmutation data not found for '{SAV}'")
            continue
        else:
            features[i] = tuple(np.mean(data, axis=0))

    LOGGER.report('EVmutation scores recovered in %.1fs.', '_EVmut')
    return features
Ejemplo n.º 46
0
def updateDefinitions():
    """Update definitions and set some global variables.  This function must be
    called at the end of the module."""

    global DEFINITIONS, AMINOACIDS, BACKBONE, TIMESTAMP
    DEFINITIONS = {}
    user = SETTINGS.get('flag_definitions', {})

    # nucleics
    nucleic = set()
    for key in ['nucleobase', 'nucleoside', 'nucleotide']:
        aset = set(user.get(key, DEFAULTS[key]))
        nucleic.update(aset)
        DEFINITIONS[key] = aset
    DEFINITIONS['nucleic'] = nucleic

    # heteros
    for key in [
            'water', 'lipid', 'ion', 'sugar', 'heme', 'at', 'cg', 'purine',
            'pyrimidine'
    ]:
        DEFINITIONS[key] = set(user.get(key, DEFAULTS[key]))

    DEFINITIONS['backbone'] = DEFINITIONS['bb'] = set(
        user.get(key, DEFAULTS['bb']))
    DEFINITIONS['backbonefull'] = DEFINITIONS['bbfull'] = set(
        user.get(key, DEFAULTS['bbfull']))

    # element regex
    for key in ['hydrogen', 'carbon', 'nitrogen', 'oxygen', 'sulfur']:
        DEFINITIONS[key] = recompile(user.get(key, DEFAULTS[key]))

    try:
        nonstd = SETTINGS[NONSTANDARD_KEY]

    except KeyError:
        nonstd = NONSTANDARD
        DEFINITIONS.update(CATEGORIZED)
    else:

        for cat in CATEGORIES:
            for key in CATEGORIES[cat]:
                DEFINITIONS[key] = set(DEFAULTS[key])

        DEFINITIONS['charged'] = set(DEFINITIONS['acidic'])
        DEFINITIONS['charged'].update(DEFINITIONS['basic'])

        for resi, props in nonstd.items():
            for prop in props:
                DEFINITIONS[prop].add(resi)

    DEFINITIONS['stdaa'] = DEFAULTS['stdaa']
    DEFINITIONS['nonstdaa'] = set(nonstd)
    AMINOACIDS = set(DEFINITIONS['stdaa'])
    AMINOACIDS.update(DEFINITIONS['nonstdaa'])
    DEFINITIONS['protein'] = DEFINITIONS['aminoacid'] = AMINOACIDS

    BACKBONE = DEFINITIONS['bb']

    global TIMESTAMP
    TIMESTAMP = SETTINGS.get(TIMESTAMP_KEY, 0)
Ejemplo n.º 47
0
def resetDefinitions(flag):

    if flag == 'all':
        SETTINGS.pop(DEFINITIONS_KEY, None)
        SETTINGS.pop(NONSTANDARD_KEY, None)
        SETTINGS[TIMESTAMP_KEY] = int(time())
        SETTINGS.save()
        updateDefinitions()
    elif flag == 'nonstdaa':
        SETTINGS.pop(NONSTANDARD_KEY, None)
        SETTINGS[TIMESTAMP_KEY] = int(time())
        SETTINGS.save()
        updateDefinitions()
    else:
        try:
            SETTINGS.pop(DEFINITIONS_KEY, {}).pop(flag)
        except KeyError:
            pass
        else:
            SETTINGS[TIMESTAMP_KEY] = int(time())
            SETTINGS.save()
            updateDefinitions()
Ejemplo n.º 48
0
def fetchPDBLigand(cci, filename=None):
    """Fetch PDB ligand data from PDB_ for chemical component *cci*.
    *cci* may be 3-letter chemical component identifier or a valid XML
    filename.  If *filename* is given, XML file will be saved with that name.

    If you query ligand data frequently, you may configure ProDy to save XML
    files in your computer.  Set ``ligand_xml_save`` option **True**, i.e.
    ``confProDy(ligand_xml_save=True)``.  Compressed XML files will be save
    to ProDy package folder, e.g. :file:`/home/user/.prody/pdbligands`.  Each
    file is around 5Kb when compressed.

    This function is compatible with PDBx/PDBML v 4.0.

    Ligand data is returned in a dictionary.  Ligand coordinate atom data with
    *model* and *ideal* coordinate sets are also stored in this dictionary.
    Note that this dictionary will contain data that is present in the XML
    file and all Ligand Expo XML files do not contain every possible data
    field.  So, it may be better if you use :meth:`dict.get` instead of
    indexing the dictionary, e.g. to retrieve formula weight (or relative
    molar mass) of the chemical component use ``data.get('formula_weight')``
    instead of ``data['formula_weight']`` to avoid exceptions when this data
    field is not found in the XML file.  URL and/or path of the XML file are
    returned in the dictionary with keys ``url`` and ``path``, respectively.

    Following example downloads data for ligand STI (a.k.a. Gleevec and
    Imatinib) and calculates RMSD between model (X-ray structure 1IEP) and
    ideal (energy minimized) coordinate sets:

    .. ipython:: python

       from prody import *
       ligand_data = fetchPDBLigand('STI')
       ligand_data['model_coordinates_db_code']
       ligand_model = ligand_data['model']
       ligand_ideal = ligand_data['ideal']
       transformation = superpose(ligand_ideal.noh, ligand_model.noh)
       calcRMSD(ligand_ideal.noh, ligand_model.noh)"""

    if not isinstance(cci, str):
        raise TypeError('cci must be a string')
    if isfile(cci):
        inp = openFile(cci)
        xml = inp.read()
        inp.close()
        url = None
        path = cci
        cci = splitext(splitext(split(cci)[1])[0])[0].upper()
    elif len(cci) > 4 or not cci.isalnum():
        raise ValueError('cci must be 3-letters long and alphanumeric or '
                         'a valid filename')
    else:
        xml = None
        cci = cci.upper()
        if SETTINGS.get('ligand_xml_save'):
            folder = join(getPackagePath(), 'pdbligands')
            if not isdir(folder):
                makePath(folder)
            xmlgz = path = join(folder, cci + '.xml.gz')
            if isfile(xmlgz):
                with openFile(xmlgz) as inp:
                    xml = inp.read()
        else:
            path = None
        #url = ('http://ligand-expo.rcsb.org/reports/{0[0]}/{0}/{0}'
        #       '.xml'.format(cci.upper()))
        url = 'http://www.pdb.org/pdb/files/ligand/{0}.xml'.format(cci.upper())
        if not xml:
            #'http://www.pdb.org/pdb/files/ligand/{0}.xml'
            try:
                inp = openURL(url)
            except IOError:
                raise IOError('XML file for ligand {0} is not found online'
                              .format(cci))
            else:
                xml = inp.read()
                inp.close()
            if filename:
                out = openFile(filename, mode='w', folder=folder)
                out.write(xml)
                out.close()
            if SETTINGS.get('ligand_xml_save'):
                with openFile(xmlgz, 'w') as out:
                    out.write(xml)

    import xml.etree.cElementTree as ET

    root = ET.XML(xml)
    if (root.get('{http://www.w3.org/2001/XMLSchema-instance}'
                 'schemaLocation') !=
            'http://pdbml.pdb.org/schema/pdbx-v40.xsd pdbx-v40.xsd'):
        LOGGER.warn('XML is not in PDBx/PDBML v 4.0 format, resulting '
                    'dictionary may not contain all data fields')
    ns = root.tag[:root.tag.rfind('}')+1]
    len_ns = len(ns)
    dict_ = {'url': url, 'path': path}

    for child in list(root.find(ns + 'chem_compCategory')[0]):
        tag = child.tag[len_ns:]
        if tag.startswith('pdbx_'):
            tag = tag[5:]
        dict_[tag] = child.text
    dict_['formula_weight'] = float(dict_.get('formula_weight'))

    identifiers_and_descriptors = []
    results = root.find(ns + 'pdbx_chem_comp_identifierCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    results = root.find(ns + 'pdbx_chem_comp_descriptorCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    for child in identifiers_and_descriptors:
        program = child.get('program').replace(' ', '_')
        type_ = child.get('type').replace(' ', '_')
        dict_[program + '_' + type_] = child[0].text
        dict_[program + '_version'] = child.get('program_version')

    dict_['audits'] = [(audit.get('action_type'), audit.get('date'))
                       for audit in
                       list(root.find(ns + 'pdbx_chem_comp_auditCategory'))]

    atoms = list(root.find(ns + 'chem_comp_atomCategory'))
    n_atoms = len(atoms)
    ideal_coords = np.zeros((n_atoms, 3))
    model_coords = np.zeros((n_atoms, 3))

    atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    elements = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['element'].dtype)
    resnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['resname'].dtype)
    charges = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    resnums = np.ones(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    alternate_atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    leaving_atom_flags = np.zeros(n_atoms, np.bool)
    aromatic_flags = np.zeros(n_atoms, np.bool)
    stereo_configs = np.zeros(n_atoms, np.bool)
    ordinals = np.zeros(n_atoms, int)

    name2index = {}

    for i, atom in enumerate(atoms):
        data = dict([(child.tag[len_ns:], child.text) for child in list(atom)])

        name = data.get('pdbx_component_atom_id', 'X')
        name2index[name] = i
        atomnames[i] = name
        elements[i] = data.get('type_symbol', 'X')
        resnames[i] = data.get('pdbx_component_comp_id', 'UNK')
        charges[i] = float(data.get('charge', 0))

        alternate_atomnames[i] = data.get('alt_atom_id', 'X')
        leaving_atom_flags[i] = data.get('pdbx_leaving_atom_flag') == 'Y'
        aromatic_flags[i] = data.get('pdbx_atomatic_flag') == 'Y'
        stereo_configs[i] = data.get('pdbx_stereo_config') == 'Y'
        ordinals[i] = int(data.get('pdbx_ordinal', 0))

        model_coords[i, 0] = float(data.get('model_Cartn_x', 0))
        model_coords[i, 1] = float(data.get('model_Cartn_y', 0))
        model_coords[i, 2] = float(data.get('model_Cartn_z', 0))
        ideal_coords[i, 0] = float(data.get('pdbx_model_Cartn_x_ideal', 0))
        ideal_coords[i, 1] = float(data.get('pdbx_model_Cartn_y_ideal', 0))
        ideal_coords[i, 2] = float(data.get('pdbx_model_Cartn_z_ideal', 0))

    pdbid = dict_.get('model_coordinates_db_code')
    if pdbid:
        model = AtomGroup(cci + ' model ({0})'.format(pdbid))
    else:
        model = AtomGroup(cci + ' model')
    model.setCoords(model_coords)
    model.setNames(atomnames)
    model.setResnames(resnames)
    model.setResnums(resnums)
    model.setElements(elements)
    model.setCharges(charges)
    model.setFlags('leaving_atom_flags', leaving_atom_flags)
    model.setFlags('aromatic_flags', aromatic_flags)
    model.setFlags('stereo_configs', stereo_configs)
    model.setData('ordinals', ordinals)
    model.setData('alternate_atomnames', alternate_atomnames)
    dict_['model'] = model
    ideal = model.copy()
    ideal.setTitle(cci + ' ideal')
    ideal.setCoords(ideal_coords)
    dict_['ideal'] = ideal

    bonds = []
    warned = set()
    for bond in list(root.find(ns + 'chem_comp_bondCategory') or bonds):
        name_1 = bond.get('atom_id_1')
        name_2 = bond.get('atom_id_2')
        try:
            bonds.append((name2index[name_1], name2index[name_2]))
        except KeyError:
            if name_1 not in warned and name_1 not in name2index:
                warned.add(name_1)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_1), cci))
            if name_2 not in warned and name_2 not in name2index:
                warned.add(name_2)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_2), cci))
    if bonds:
        bonds = np.array(bonds, int)
        model.setBonds(bonds)
        ideal.setBonds(bonds)
    return dict_
Ejemplo n.º 49
0
def updateNonstandard(nonstd):

    SETTINGS[NONSTANDARD_KEY] = nonstd
    SETTINGS[TIMESTAMP_KEY] = int(time())
    SETTINGS.save()
    updateDefinitions()
Ejemplo n.º 50
0
def fetchPDBLigand(cci, filename=None):
    """Fetch PDB ligand data from PDB_ for chemical component *cci*.
    *cci* may be 3-letter chemical component identifier or a valid XML
    filename.  If *filename* is given, XML file will be saved with that name.

    If you query ligand data frequently, you may configure ProDy to save XML
    files in your computer.  Set ``ligand_xml_save`` option **True**, i.e.
    ``confProDy(ligand_xml_save=True)``.  Compressed XML files will be save
    to ProDy package folder, e.g. :file:`/home/user/.prody/pdbligands`.  Each
    file is around 5Kb when compressed.

    This function is compatible with PDBx/PDBML v 4.0.

    Ligand data is returned in a dictionary.  Ligand coordinate atom data with
    *model* and *ideal* coordinate sets are also stored in this dictionary.
    Note that this dictionary will contain data that is present in the XML
    file and all Ligand Expo XML files do not contain every possible data
    field.  So, it may be better if you use :meth:`dict.get` instead of
    indexing the dictionary, e.g. to retrieve formula weight (or relative
    molar mass) of the chemical component use ``data.get('formula_weight')``
    instead of ``data['formula_weight']`` to avoid exceptions when this data
    field is not found in the XML file.  URL and/or path of the XML file are
    returned in the dictionary with keys ``url`` and ``path``, respectively.

    Following example downloads data for ligand STI (a.k.a. Gleevec and
    Imatinib) and calculates RMSD between model (X-ray structure 1IEP) and
    ideal (energy minimized) coordinate sets:

    .. ipython:: python

       from prody import *
       ligand_data = fetchPDBLigand('STI')
       ligand_data['model_coordinates_db_code']
       ligand_model = ligand_data['model']
       ligand_ideal = ligand_data['ideal']
       transformation = superpose(ligand_ideal.noh, ligand_model.noh)
       calcRMSD(ligand_ideal.noh, ligand_model.noh)"""

    if not isinstance(cci, str):
        raise TypeError('cci must be a string')
    if isfile(cci):
        inp = openFile(cci)
        xml = inp.read()
        inp.close()
        url = None
        path = cci
        cci = splitext(splitext(split(cci)[1])[0])[0].upper()
    elif len(cci) > 4 or not cci.isalnum():
        raise ValueError('cci must be 3-letters long and alphanumeric or '
                         'a valid filename')
    else:
        xml = None
        cci = cci.upper()
        if SETTINGS.get('ligand_xml_save'):
            folder = join(getPackagePath(), 'pdbligands')
            if not isdir(folder):
                makePath(folder)
            xmlgz = path = join(folder, cci + '.xml.gz')
            if isfile(xmlgz):
                with openFile(xmlgz) as inp:
                    xml = inp.read()
        else:
            path = None
        #url = ('http://ligand-expo.rcsb.org/reports/{0[0]}/{0}/{0}'
        #       '.xml'.format(cci.upper()))
        url = 'http://files.rcsb.org/ligands/download/{0}.xml'.format(
            cci.upper())
        if not xml:
            #'http://www.pdb.org/pdb/files/ligand/{0}.xml'
            try:
                inp = openURL(url)
            except IOError:
                raise IOError(
                    'XML file for ligand {0} is not found online'.format(cci))
            else:
                xml = inp.read()
                inp.close()
            if filename:
                out = openFile(filename, mode='w', folder=folder)
                out.write(xml)
                out.close()
            if SETTINGS.get('ligand_xml_save'):
                with openFile(xmlgz, 'w') as out:
                    out.write(xml)

    import xml.etree.cElementTree as ET

    root = ET.XML(xml)
    if (root.get('{http://www.w3.org/2001/XMLSchema-instance}'
                 'schemaLocation') !=
            'http://pdbml.pdb.org/schema/pdbx-v40.xsd pdbx-v40.xsd'):
        LOGGER.warn('XML is not in PDBx/PDBML v 4.0 format, resulting '
                    'dictionary may not contain all data fields')
    ns = root.tag[:root.tag.rfind('}') + 1]
    len_ns = len(ns)
    dict_ = {'url': url, 'path': path}

    for child in list(root.find(ns + 'chem_compCategory')[0]):
        tag = child.tag[len_ns:]
        if tag.startswith('pdbx_'):
            tag = tag[5:]
        dict_[tag] = child.text
    dict_['formula_weight'] = float(dict_.get('formula_weight'))

    identifiers_and_descriptors = []
    results = root.find(ns + 'pdbx_chem_comp_identifierCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    results = root.find(ns + 'pdbx_chem_comp_descriptorCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    for child in identifiers_and_descriptors:
        program = child.get('program').replace(' ', '_')
        type_ = child.get('type').replace(' ', '_')
        dict_[program + '_' + type_] = child[0].text
        dict_[program + '_version'] = child.get('program_version')

    dict_['audits'] = [
        (audit.get('action_type'), audit.get('date'))
        for audit in list(root.find(ns + 'pdbx_chem_comp_auditCategory'))
    ]

    atoms = list(root.find(ns + 'chem_comp_atomCategory'))
    n_atoms = len(atoms)
    ideal_coords = np.zeros((n_atoms, 3))
    model_coords = np.zeros((n_atoms, 3))

    atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    elements = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['element'].dtype)
    resnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['resname'].dtype)
    charges = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    resnums = np.ones(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    alternate_atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    leaving_atom_flags = np.zeros(n_atoms, np.bool)
    aromatic_flags = np.zeros(n_atoms, np.bool)
    stereo_configs = np.zeros(n_atoms, np.bool)
    ordinals = np.zeros(n_atoms, int)

    name2index = {}

    for i, atom in enumerate(atoms):
        data = dict([(child.tag[len_ns:], child.text) for child in list(atom)])

        name = data.get('pdbx_component_atom_id', 'X')
        name2index[name] = i
        atomnames[i] = name
        elements[i] = data.get('type_symbol', 'X')
        resnames[i] = data.get('pdbx_component_comp_id', 'UNK')
        charges[i] = float(data.get('charge', 0))

        alternate_atomnames[i] = data.get('alt_atom_id', 'X')
        leaving_atom_flags[i] = data.get('pdbx_leaving_atom_flag') == 'Y'
        aromatic_flags[i] = data.get('pdbx_atomatic_flag') == 'Y'
        stereo_configs[i] = data.get('pdbx_stereo_config') == 'Y'
        ordinals[i] = int(data.get('pdbx_ordinal', 0))

        model_coords[i, 0] = float(data.get('model_Cartn_x', 0))
        model_coords[i, 1] = float(data.get('model_Cartn_y', 0))
        model_coords[i, 2] = float(data.get('model_Cartn_z', 0))
        ideal_coords[i, 0] = float(data.get('pdbx_model_Cartn_x_ideal', 0))
        ideal_coords[i, 1] = float(data.get('pdbx_model_Cartn_y_ideal', 0))
        ideal_coords[i, 2] = float(data.get('pdbx_model_Cartn_z_ideal', 0))

    pdbid = dict_.get('model_coordinates_db_code')
    if pdbid:
        model = AtomGroup(cci + ' model ({0})'.format(pdbid))
    else:
        model = AtomGroup(cci + ' model')
    model.setCoords(model_coords)
    model.setNames(atomnames)
    model.setResnames(resnames)
    model.setResnums(resnums)
    model.setElements(elements)
    model.setCharges(charges)
    model.setFlags('leaving_atom_flags', leaving_atom_flags)
    model.setFlags('aromatic_flags', aromatic_flags)
    model.setFlags('stereo_configs', stereo_configs)
    model.setData('ordinals', ordinals)
    model.setData('alternate_atomnames', alternate_atomnames)
    dict_['model'] = model
    ideal = model.copy()
    ideal.setTitle(cci + ' ideal')
    ideal.setCoords(ideal_coords)
    dict_['ideal'] = ideal

    bonds = []
    warned = set()
    for bond in list(root.find(ns + 'chem_comp_bondCategory') or bonds):
        name_1 = bond.get('atom_id_1')
        name_2 = bond.get('atom_id_2')
        try:
            bonds.append((name2index[name_1], name2index[name_2]))
        except KeyError:
            if name_1 not in warned and name_1 not in name2index:
                warned.add(name_1)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_1), cci))
            if name_2 not in warned and name_2 not in name2index:
                warned.add(name_2)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_2), cci))
    if bonds:
        bonds = np.array(bonds, int)
        model.setBonds(bonds)
        ideal.setBonds(bonds)
    return dict_
Ejemplo n.º 51
0
def calcEVmutPathClasses(EVmut_score):
    c = -SETTINGS.get('EVmutation_metrics')['optimal cutoff']
    EVmut_class = np.where(EVmut_score < c, 'deleterious', 'neutral')
    EVmut_class[np.isnan(EVmut_score)] = '?'
    return EVmut_class