Ejemplo n.º 1
0
def parsePDB(pdb, **kwargs):
    """Return an :class:`.AtomGroup` and/or dictionary containing header data 
    parsed from a PDB file. 
    
    This function extends :func:`.parsePDBStream`.
    
    |example| See :ref:`parsepdb` for a detailed example.
    
    :arg pdb: a PDB identifier or a filename
        If needed, PDB files are downloaded using :func:`.fetchPDB()` function.
    """
    
    title = kwargs.get('title', None)
    if not os.path.isfile(pdb):
        if len(pdb) == 4 and pdb.isalnum():
            if title is None:
                title = pdb
                kwargs['title'] = title
            filename = fetchPDB(pdb)
            if filename is None:
                raise IOError('PDB file for {0:s} could not be downloaded.'
                              .format(pdb))
            pdb = filename
        else:
            raise IOError('{0:s} is not a valid filename or a valid PDB '
                          'identifier.'.format(pdb))
    if title is None:
        title, ext = os.path.splitext(os.path.split(pdb)[1])
        if ext == '.gz':
            title, ext = os.path.splitext(title)
        kwargs['title'] = title
    pdb = openFile(pdb)
    result = parsePDBStream(pdb, **kwargs)
    pdb.close()
    return result
Ejemplo n.º 2
0
def _parsePDB(pdb, **kwargs):
    title = kwargs.get('title', None)
    chain = ''
    if not os.path.isfile(pdb):
        pdb, chain = _getPDBid(pdb)
        if title is None:
            title = pdb
            kwargs['title'] = title
        filename = fetchPDB(pdb, **kwargs)
        if filename is None:
            raise IOError(
                'PDB file for {0} could not be downloaded.'.format(pdb))
        pdb = filename
    if title is None:
        title, ext = os.path.splitext(os.path.split(pdb)[1])
        if ext == '.gz':
            title, ext = os.path.splitext(title)
        if len(title) == 7 and title.startswith('pdb'):
            title = title[3:]
        kwargs['title'] = title
    pdb = openFile(pdb, 'rt')
    if chain != '':
        kwargs['chain'] = chain
    result = parsePDBStream(pdb, **kwargs)
    pdb.close()
    return result
Ejemplo n.º 3
0
def loadPDBClusters(sqid=None):
    """Load previously fetched PDB sequence clusters from disk to memory."""

    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if sqid is None:
        sqid_list = list(PDB_CLUSTERS)
        LOGGER.info('Loading all PDB sequence clusters.')
    else:
        assert isinstance(sqid, int), 'sqid must be an integer' 
        if sqid not in PDB_CLUSTERS:
            raise ValueError('PDB cluster data is not available for sequence '
                             'identity {0}%, try one of {1}'
                             .format(sqid, PDB_CLUSTERS_SQID_STR))
        LOGGER.info('Loading PDB sequence clusters for sequence identity '
                    '{0}.'.format(sqid))
        sqid_list = [sqid]
    global PDB_CLUSTERS_UPDATE_WARNING
    for sqid in sqid_list:
        filename = os.path.join(PDB_CLUSTERS_PATH, 
                                'bc-{0}.out.gz'.format(sqid))
        if not os.path.isfile(filename):
            fetchPDBClusters(sqid)
            
        if PDB_CLUSTERS_UPDATE_WARNING:
            import time
            diff = (time.time() - os.path.getmtime(filename)) / 604800.
            if diff > 1.:
                LOGGER.warning('PDB sequence clusters are {0:.1f} week(s) old,'
                               ' call `fetchPDBClusters` to receive updates.'
                               .format(diff))
                PDB_CLUSTERS_UPDATE_WARNING = False
        inp = openFile(filename)
        PDB_CLUSTERS[sqid] = inp.read()
        inp.close()
Ejemplo n.º 4
0
def _parsePDB(pdb, **kwargs):
    title = kwargs.get('title', None)
    chain = ''
    if not os.path.isfile(pdb):
        pdb, chain = _getPDBid(pdb)
        if title is None:
            title = pdb
            kwargs['title'] = title
        filename = fetchPDB(pdb, **kwargs)
        if filename is None:
            raise IOError('PDB file for {0} could not be downloaded.'
                          .format(pdb))
        pdb = filename
    if title is None:
        title, ext = os.path.splitext(os.path.split(pdb)[1])
        if ext == '.gz':
            title, ext = os.path.splitext(title)
        if len(title) == 7 and title.startswith('pdb'):
            title = title[3:]
        kwargs['title'] = title
    pdb = openFile(pdb, 'rt')
    if chain != '':
        kwargs['chain'] = chain
    result = parsePDBStream(pdb, **kwargs)
    pdb.close()
    return result
Ejemplo n.º 5
0
def loadPDBClusters(sqid=None):
    """Load previously fetched PDB sequence clusters from disk to memory."""

    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if sqid is None:
        sqid_list = list(PDB_CLUSTERS)
        LOGGER.info('Loading all PDB sequence clusters.')
    else:
        assert isinstance(sqid, int), 'sqid must be an integer'
        if sqid not in PDB_CLUSTERS:
            raise ValueError('PDB cluster data is not available for sequence '
                             'identity {0}%, try one of {1}'.format(
                                 sqid, PDB_CLUSTERS_SQID_STR))
        LOGGER.info('Loading PDB sequence clusters for sequence identity '
                    '{0}.'.format(sqid))
        sqid_list = [sqid]
    global PDB_CLUSTERS_UPDATE_WARNING
    for sqid in sqid_list:
        filename = os.path.join(PDB_CLUSTERS_PATH,
                                'bc-{0}.out.gz'.format(sqid))
        if not os.path.isfile(filename):
            fetchPDBClusters(sqid)

        if PDB_CLUSTERS_UPDATE_WARNING:
            import time
            diff = (time.time() - os.path.getmtime(filename)) / 604800.
            if diff > 1.:
                LOGGER.warning(
                    'PDB sequence clusters are {0:.1f} week(s) old,'
                    ' call `fetchPDBClusters` to receive updates.'.format(
                        diff))
                PDB_CLUSTERS_UPDATE_WARNING = False
        inp = openFile(filename)
        PDB_CLUSTERS[sqid] = inp.read()
        inp.close()
Ejemplo n.º 6
0
def saveEnsemble(ensemble, filename=None, **kwargs):
    """Save *ensemble* model data as :file:`filename.ens.npz`.  If *filename*
    is ``None``, title of the *ensemble* will be used as the filename, after
    white spaces in the title are replaced with underscores.  Extension is
    :file:`.ens.npz`. Upon successful completion of saving, filename is
    returned. This function makes use of :func:`numpy.savez` function."""

    if not isinstance(ensemble, Ensemble):
        raise TypeError('invalid type for ensemble, {0}'
                        .format(type(ensemble)))
    if len(ensemble) == 0:
        raise ValueError('ensemble instance does not contain data')

    dict_ = ensemble.__dict__
    attr_list = ['_title', '_confs', '_weights', '_coords']
    if isinstance(ensemble, PDBEnsemble):
        attr_list.append('_labels')
        attr_list.append('_trans')
    if filename is None:
        filename = ensemble.getTitle().replace(' ', '_')
    attr_dict = {}
    for attr in attr_list:
        value = dict_[attr]
        if value is not None:
            attr_dict[attr] = value
            
    attr_dict['_atoms'] = np.array([dict_['_atoms'], 0])
    filename += '.ens.npz'
    ostream = openFile(filename, 'wb', **kwargs)
    np.savez(ostream, **attr_dict)
    ostream.close()
    return filename
Ejemplo n.º 7
0
def saveEnsemble(ensemble, filename=None, **kwargs):
    """Save *ensemble* model data as :file:`filename.ens.npz`.  If *filename*
    is ``None``, title of the *ensemble* will be used as the filename, after
    white spaces in the title are replaced with underscores.  Extension is
    :file:`.ens.npz`. Upon successful completion of saving, filename is
    returned. This function makes use of :func:`numpy.savez` function."""

    if not isinstance(ensemble, Ensemble):
        raise TypeError('invalid type for ensemble, {0}'.format(
            type(ensemble)))
    if len(ensemble) == 0:
        raise ValueError('ensemble instance does not contain data')

    dict_ = ensemble.__dict__
    attr_list = ['_title', '_confs', '_weights', '_coords']
    if isinstance(ensemble, PDBEnsemble):
        attr_list.append('_labels')
        attr_list.append('_trans')
    if filename is None:
        filename = ensemble.getTitle().replace(' ', '_')
    attr_dict = {}
    for attr in attr_list:
        value = dict_[attr]
        if value is not None:
            attr_dict[attr] = value

    attr_dict['_atoms'] = np.array([dict_['_atoms'], 0])
    filename += '.ens.npz'
    ostream = openFile(filename, 'wb', **kwargs)
    np.savez(ostream, **attr_dict)
    ostream.close()
    return filename
Ejemplo n.º 8
0
def saveHiC(hic, filename=None, map=True, **kwargs):
    """Saves *HiC* model data as :file:`filename.hic.npz`. If *map* is **True**, 
    Hi-C contact map will not be saved and it can be loaded from raw data file 
    later. If *filename* is **None**, name of the Hi-C instance will be used as 
    the filename, after ``" "`` (white spaces) in the name are replaced with 
    ``"_"`` (underscores). Upon successful completion of saving, filename is 
    returned. This function makes use of :func:`numpy.savez` function."""

    assert isinstance(hic, HiC), 'hic must be a HiC instance.'

    if filename is None:
        filename = hic.getTitle().replace(' ', '_')

    if filename.endswith('.hic'):
        filename += '.npz'
    elif not filename.endswith('.hic.npz'):
        filename += '.hic.npz'

    attr_dict = hic.__dict__.copy()
    if not map:
        attr_dict.pop('_map')

    ostream = openFile(filename, 'wb', **kwargs)
    np.savez(ostream, **attr_dict)
    ostream.close()

    return filename
Ejemplo n.º 9
0
def parsePQR(filename, **kwargs):
    """Returns an :class:`.AtomGroup` containing data parsed from PDB lines.

    :arg filename: a PQR filename
    :type filename: str"""

    title = kwargs.get('title', kwargs.get('name'))
    chain = kwargs.get('chain')
    subset = kwargs.get('subset')
    if not os.path.isfile(filename):
        raise IOError('No such file: {0}'.format(repr(filename)))
    if title is None:
        fn, ext = os.path.splitext(os.path.split(filename)[1])
        if ext == '.gz':
            fn, ext = os.path.splitext(fn)
        title = fn.lower()
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'.format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = '_' + chain + title_suffix
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    else:
        ag = AtomGroup(title + title_suffix)
        n_csets = 0

    pqr = openFile(filename, 'rt')
    lines = pqr.readlines()
    pqr.close()
    LOGGER.timeit()
    ag = _parsePDBLines(ag,
                        lines,
                        split=0,
                        model=1,
                        chain=chain,
                        subset=subset,
                        altloc_torf=False,
                        format='pqr')
    if ag.numAtoms() > 0:
        LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(ag.numAtoms(),
                                                ag.numCoordsets() - n_csets))
        return ag
    else:
        return None
Ejemplo n.º 10
0
def parsePQR(filename, **kwargs):
    """Returns an :class:`.AtomGroup` containing data parsed from PDB lines.

    :arg filename: a PQR filename
    :type filename: str"""

    title = kwargs.get('title', kwargs.get('name'))
    model = 1
    header = False
    chain = kwargs.get('chain')
    subset = kwargs.get('subset')
    altloc = kwargs.get('altloc', 'A')
    max_n_atoms = kwargs.get('max_n_atoms', 1e5)
    if not os.path.isfile(filename):
        raise IOError('No such file: {0}'.format(repr(filename)))
    if title is None:
        fn, ext = os.path.splitext(os.path.split(filename)[1])
        if ext == '.gz':
            fn, ext = os.path.splitext(fn)
        title = fn.lower()
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'
                             .format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = '_' + chain + title_suffix
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    else:
        ag = AtomGroup(title + title_suffix)
        n_csets = 0

    pqr = openFile(filename, 'rt')
    lines = pqr.readlines()
    pqr.close()
    LOGGER.timeit()
    ag = _parsePDBLines(ag, lines, split=0, model=1, chain=chain,
                        subset=subset, altloc_torf=False, format='pqr', 
                        max_n_atoms=max_n_atoms)
    if ag.numAtoms() > 0:
        LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(ag.numAtoms(),
                      ag.numCoordsets() - n_csets))
        return ag
    else:
        return None
Ejemplo n.º 11
0
def saveEnsemble(ensemble, filename=None, **kwargs):
    """Save *ensemble* model data as :file:`filename.ens.npz`.  If *filename*
    is **None**, title of the *ensemble* will be used as the filename, after
    white spaces in the title are replaced with underscores.  Extension is
    :file:`.ens.npz`. Upon successful completion of saving, filename is
    returned. This function makes use of :func:`~numpy.savez` function."""

    if not isinstance(ensemble, Ensemble):
        raise TypeError('invalid type for ensemble, {0}'.format(
            type(ensemble)))
    if len(ensemble) == 0:
        raise ValueError('ensemble instance does not contain data')

    dict_ = ensemble.__dict__
    attr_list = ['_title', '_confs', '_weights', '_coords', '_indices']
    if isinstance(ensemble, PDBEnsemble):
        attr_list.append('_labels')
        attr_list.append('_trans')
    elif isinstance(ensemble, ClustENM):
        attr_list.extend([
            '_ph', '_cutoff', '_gamma', '_n_modes', '_n_confs', '_rmsd',
            '_n_gens', '_maxclust', '_threshold', '_sol', '_padding',
            '_ionicStrength', '_force_field', '_tolerance', '_maxIterations',
            '_sim', '_temp', '_t_steps', '_outlier', '_mzscore', '_v1',
            '_parallel', '_idx_cg', '_n_cg', '_cycle', '_time', '_targeted',
            '_tmdk'
        ])

    if filename is None:
        filename = ensemble.getTitle().replace(' ', '_')
    attr_dict = {}
    for attr in attr_list:
        value = dict_[attr]
        if value is not None:
            attr_dict[attr] = value

    atoms = dict_['_atoms']
    if atoms is not None:
        attr_dict['_atoms'] = np.array([atoms, None], dtype=object)

    data = dict_['_data']
    if len(data):
        attr_dict['_data'] = np.array([data, None], dtype=object)

    if isinstance(ensemble, PDBEnsemble):
        msa = dict_['_msa']
        if msa is not None:
            attr_dict['_msa'] = np.array([msa, None], dtype=object)

    attr_dict['_type'] = ensemble.__class__.__name__

    if filename.endswith('.ens'):
        filename += '.npz'
    if not filename.endswith('.npz'):
        filename += '.ens.npz'
    ostream = openFile(filename, 'wb', **kwargs)
    np.savez(ostream, **attr_dict)
    ostream.close()
    return filename
Ejemplo n.º 12
0
def parseEMD(emd, **kwargs):
    """Returns an :class:`.AtomGroup` containing the information parsed from EMD file. 

    This function extends :func:`.parseEMDStream`.

    See :ref:`parseEMD` for a detailed usage example. 

    :arg emd: an EMD identifier or a file name. A 4-digit EMDataBank identifier can be provided
    to download it via FTP.
    :type emd: str

    :arg cutoff: density cutoff to read EMD map. The regions with lower density than given cutoff 
    are discarded.
    :type cutoff: float or None

    :arg n_nodes: A bead based network will be constructed over provided density map. n_nodes parameter
    will show the number of beads that will fit to density map. 
    :type n_nodes: integer  

    :arg num_iter: After reading density map, coordinates are predicted with topological domain reconstruction
    method. This parameter is the total number of iterations of this algorithm: 
    :type num_iter: integer

    :arg return_map: Return the density map itself. Default is False in line with previous behaviour.
        This value is reset to True if make_nodes is False as something must be returned.
    :type return_map: bool

    :arg make_nodes: Use the topology representing network algorithm to fit pseudoatom nodes to the map.
        Default is False and sets return_map to True.
    :type make_nodes: bool
    """

    title = kwargs.get('title', None)
    if not os.path.isfile(emd):
        if len(emd) == 4 and emd.isdigit():
            if title is None:
                title = emd
                kwargs['title'] = title

            if os.path.isfile(emd + '.map'):
                filename = emd + '.map'
            elif os.path.isfile(emd + '.map.gz'):
                filename = emd + '.map.gz'
            else:
                filename = fetchPDB(emd, report=True, format='emd',compressed=False)
                if filename is None:
                    raise IOError('EMD map file for {0} could not be downloaded.'
                                  .format(emd))
            emd = filename
        else:   
            raise IOError('EMD file {0} is not available in the directory {1}'
                           .format(emd, os.getcwd()))
    if title is None:
        kwargs['title'], ext = os.path.splitext(os.path.split(emd)[1])

    emdStream = openFile(emd, 'rb')
    result = parseEMDStream(emdStream, **kwargs)
    emdStream.close()
    return result
Ejemplo n.º 13
0
def parseMMCIF(pdb, **kwargs):
    """Returns an :class:`.AtomGroup` and/or a :class:`.StarDict` containing header data
    parsed from an mmCIF file. If not found, the mmCIF file will be downloaded
    from the PDB. It will be downloaded in uncompressed format regardless of
    the compressed keyword.

    This function extends :func:`.parseMMCIFStream`.

    :arg pdb: a PDB identifier or a filename
        If needed, mmCIF files are downloaded using :func:`.fetchPDB()` function.
    :type pdb: str

    :arg chain: comma separated string or list-like of chain IDs
    :type chain: str, tuple, list, :class:`~numpy.ndarray`
    """
    chain = kwargs.pop('chain', None)
    title = kwargs.get('title', None)
    if not os.path.isfile(pdb):
        if len(pdb) == 5 and pdb.isalnum():
            if chain is None:
                chain = pdb[-1]
                pdb = pdb[:4]
            else:
                raise ValueError('Please provide chain as a keyword argument '
                                 'or part of the PDB ID, not both')
        else:
            chain = chain

        if len(pdb) == 4 and pdb.isalnum():
            if title is None:
                title = pdb
                kwargs['title'] = title

            if os.path.isfile(pdb + '.cif'):
                filename = pdb + '.cif'
            elif os.path.isfile(pdb + '.cif.gz'):
                filename = pdb + '.cif.gz'
            else:
                filename = fetchPDB(pdb, report=True,
                                    format='cif', compressed=False)
                if filename is None:
                    raise IOError('mmCIF file for {0} could not be downloaded.'
                                  .format(pdb))
            pdb = filename
        else:
            raise IOError('{0} is not a valid filename or a valid PDB '
                          'identifier.'.format(pdb))
    if title is None:
        title, ext = os.path.splitext(os.path.split(pdb)[1])
        if ext == '.gz':
            title, ext = os.path.splitext(title)
        if len(title) == 7 and title.startswith('pdb'):
            title = title[3:]
        kwargs['title'] = title
    cif = openFile(pdb, 'rt')
    result = parseMMCIFStream(cif, chain=chain, **kwargs)
    cif.close()
    return result
Ejemplo n.º 14
0
def writePSF(filename, atoms):
    """Write atoms in X-PLOR format PSF file with name *filename* and return
    *filename*.  This function will write available atom and bond information
    only."""

    try:
        n_atoms, segments, rnums, rnames, names, types, charges, masses = (
        atoms.numAtoms(), atoms._getSegnames(), atoms._getResnums(),
        atoms._getResnames(), atoms._getNames(), atoms._getTypes(),
        atoms._getCharges(), atoms._getMasses())

    except AttributeError:
        raise TypeError('atoms must be an Atomic instance')

    if segments is None:
        segments = atoms._getChids()
        if segments is None:
            segments = ['UNK'] * n_atoms

    if rnums is None:
        rnums = ones(n_atoms, int)

    if rnames is None:
        rnames = ['UNK'] * n_atoms

    if names is None:
        raise ValueError('atom names are not set')

    if types is None:
        atomtypes = zeros(n_atoms, array(['a']).dtype.char + '1')

    long_fields = array([len(tp) for tp in types]).max() > 4

    out = openFile(filename, 'w')
    write = out.write
    write('PSF{0}\n'.format( ' NAMD' if long_fields else ''))
    write('\n')
    write('{0:8d} !NTITLE\n'.format(1))
    write(' REMARKS {0}\n'.format(str(atoms)))
    write('\n')
    write('{0:8d} !NATOM\n'.format(n_atoms))

    for i in range(n_atoms):
        write(PSFLINE % (i + 1, segments[i], rnums[i], rnames[i], names[i],
                        types[i], charges[i], masses[i], 0))
    bonds = list(atoms._iterBonds())
    if bonds:
        bonds = array(bonds, int) + 1
        write('\n')
        write('{0:8d} !NBOND: bonds\n'.format(len(bonds)))
        for i, bond in enumerate(bonds):
            write('%8s%8s' % (bond[0], bond[1]))
            if i % 4 == 3:
                write('\n')
        if i % 4 != 3:
            write('\n')
    out.close()
    return filename
Ejemplo n.º 15
0
def writePSF(filename, atoms):
    """Write atoms in X-PLOR format PSF file with name *filename* and return
    *filename*.  This function will write available atom and bond information
    only."""

    try:
        n_atoms, segments, rnums, rnames, names, types, charges, masses = (
            atoms.numAtoms(), atoms._getSegnames(), atoms._getResnums(),
            atoms._getResnames(), atoms._getNames(), atoms._getTypes(),
            atoms._getCharges(), atoms._getMasses())

    except AttributeError:
        raise TypeError('atoms must be an Atomic instance')

    if segments is None:
        segments = atoms._getChids()
        if segments is None:
            segments = ['UNK'] * n_atoms

    if rnums is None:
        rnums = ones(n_atoms, int)

    if rnames is None:
        rnames = ['UNK'] * n_atoms

    if names is None:
        raise ValueError('atom names are not set')

    if types is None:
        atomtypes = zeros(n_atoms, array(['a']).dtype.char + '1')

    long_fields = array([len(tp) for tp in types]).max() > 4

    out = openFile(filename, 'w')
    write = out.write
    write('PSF{0}\n'.format(' NAMD' if long_fields else ''))
    write('\n')
    write('{0:8d} !NTITLE\n'.format(1))
    write(' REMARKS {0}\n'.format(str(atoms)))
    write('\n')
    write('{0:8d} !NATOM\n'.format(n_atoms))

    for i in range(n_atoms):
        write(PSFLINE % (i + 1, segments[i], rnums[i], rnames[i], names[i],
                         types[i], charges[i], masses[i], 0))
    bonds = list(atoms._iterBonds())
    if bonds:
        bonds = array(bonds, int) + 1
        write('\n')
        write('{0:8d} !NBOND: bonds\n'.format(len(bonds)))
        for i, bond in enumerate(bonds):
            write('%8s%8s' % (bond[0], bond[1]))
            if i % 4 == 3:
                write('\n')
        if i % 4 != 3:
            write('\n')
    out.close()
    return filename
Ejemplo n.º 16
0
def writePDB(filename, atoms, model=None):
    """Write *atoms* in PDB format to a file with name *filename* and return 
    *filename*.  If *filename* ends with :file:`.gz`, a compressed file will 
    be written."""
    
    out = openFile(filename, 'w')
    writePDBStream(out, atoms, model)
    out.close()
    return filename
Ejemplo n.º 17
0
def writePQR(filename, atoms, **kwargs):
    """Write *atoms* in PQR format to a file with name *filename*.  Only
    current coordinate set is written.  Returns *filename* upon success.  If
    *filename* ends with :file:`.gz`, a compressed file will be written."""

    stream = openFile(filename, 'w')
    writePQRStream(stream, atoms, **kwargs)
    stream.close()
    return filename
Ejemplo n.º 18
0
def writePQR(filename, atoms, **kwargs):
    """Write *atoms* in PQR format to a file with name *filename*.  Only
    current coordinate set is written.  Returns *filename* upon success.  If
    *filename* ends with :file:`.gz`, a compressed file will be written."""

    stream = openFile(filename, 'w')
    writePQRStream(stream, atoms, **kwargs)
    stream.close()
    return filename
Ejemplo n.º 19
0
def writeOverlapTable(filename, rows, cols):
    """Write table of overlaps (correlations) between two sets of modes to a
    file.  *rows* and *cols* are sets of normal modes, and correspond to rows
    and columns of the overlap table.  See also :func:`.printOverlapTable`."""

    assert isinstance(filename, str), 'filename must be a string'
    out = openFile(filename, 'w')
    out.write(getOverlapTable(rows, cols))
    out.close()
    return filename
Ejemplo n.º 20
0
def writeOverlapTable(filename, rows, cols):
    """Write table of overlaps (correlations) between two sets of modes to a
    file.  *rows* and *cols* are sets of normal modes, and correspond to rows
    and columns of the overlap table.  See also :func:`.printOverlapTable`."""

    assert isinstance(filename, str), 'filename must be a string'
    out = openFile(filename, 'w')
    out.write(getOverlapTable(rows, cols))
    out.close()
    return filename
Ejemplo n.º 21
0
def saveModel(nma, filename=None, matrices=False, **kwargs):
    """Save *nma* model data as :file:`filename.nma.npz`.  By default,
    eigenvalues, eigenvectors, variances, trace of covariance matrix,
    and name of the model will be saved.  If *matrices* is ``True``,
    covariance, Hessian or Kirchhoff matrices are saved too, whichever
    are available.  If *filename* is ``None``, name of the NMA instance
    will be used as the filename, after ``" "`` (white spaces) in the name
    are replaced with ``"_"`` (underscores).  Extension may differ based
    on the type of the NMA model.  For ANM models, it is :file:`.anm.npz`.
    Upon successful completion of saving, filename is returned. This
    function makes use of :func:`numpy.savez` function."""

    if not isinstance(nma, NMA):
        raise TypeError('invalid type for nma, {0}'.format(type(nma)))
    if len(nma) == 0:
        raise ValueError('nma instance does not contain data')

    dict_ = nma.__dict__
    attr_list = [
        '_title', '_trace', '_array', '_eigvals', '_vars', '_n_atoms', '_dof',
        '_n_modes'
    ]
    if filename is None:
        filename = nma.getTitle().replace(' ', '_')
    if isinstance(nma, GNMBase):
        attr_list.append('_cutoff')
        attr_list.append('_gamma')
        if matrices:
            attr_list.append('_kirchhoff')
            if isinstance(nma, ANM):
                attr_list.append('_hessian')
        if isinstance(nma, ANM):
            type_ = 'ANM'
        else:
            type_ = 'GNM'
    elif isinstance(nma, EDA):
        type_ = 'EDA'
    elif isinstance(nma, PCA):
        type_ = 'PCA'
    else:
        type_ = 'NMA'

    if matrices:
        attr_list.append('_cov')
    attr_dict = {'type': type_}
    for attr in attr_list:
        value = dict_[attr]
        if value is not None:
            attr_dict[attr] = value
    filename += '.' + type_.lower() + '.npz'
    ostream = openFile(filename, 'wb', **kwargs)
    np.savez(ostream, **attr_dict)
    ostream.close()
    return filename
Ejemplo n.º 22
0
def saveAtoms(atoms, filename=None, **kwargs):
    """Save *atoms* in ProDy internal format.  All atomic classes are accepted 
    as *atoms* argument.  This function saves user set atomic data as well.  
    Note that title of the AtomGroup instance is used as the filename when 
    *atoms* is not an AtomGroup.  To avoid overwriting an existing file with 
    the same name, specify a *filename*."""
    
    if not isinstance(atoms, Atomic):
        raise TypeError('atoms must be Atomic instance, not {0:s}'
                        .format(type(atoms)))
    if isinstance(atoms, AtomGroup):
        ag = atoms
        title = ag.getTitle()
        SKIP = SAVE_SKIP_ATOMGROUP
    else:
        ag = atoms.getAtomGroup()
        title = str(atoms)
        SKIP = SAVE_SKIP_POINTER
    
    if filename is None:
        filename = ag.getTitle().replace(' ', '_')
    filename += '.ag.npz'
    attr_dict = {'title': title}
    attr_dict['n_atoms'] = atoms.numAtoms()
    attr_dict['n_csets'] = atoms.numCoordsets()
    attr_dict['cslabels'] = atoms.getCSLabels()
    coords = atoms._getCoordsets()
    if coords is not None:
        attr_dict['coordinates'] = coords
    bonds = ag._bonds
    bmap = ag._bmap
    if bonds is not None and bmap is not None:
        if isinstance(atoms, AtomGroup):
            attr_dict['bonds'] = bonds
            attr_dict['bmap'] = bmap
            attr_dict['numbonds'] = ag._data['numbonds']
            frags = ag._data['fragindices']
            if frags is not None:
                attr_dict['fragindices'] = frags
        else:
            bonds = trimBonds(bonds, atoms._getIndices())
            attr_dict['bonds'] = bonds
            attr_dict['bmap'], attr_dict['numbonds'] = \
                evalBonds(bonds, len(atoms))
    
    for key, data in ag._data.iteritems():
        if key in SKIP:
            continue
        if data is not None:
            attr_dict[key] = data 
    ostream = openFile(filename, 'wb', **kwargs)
    savez(ostream, **attr_dict)
    ostream.close()
    return filename
Ejemplo n.º 23
0
    def setUp(self):

        self.pref = join(TEMPDIR, 'compressed.txt')
        self.gzfn = self.pref + '.gz'
        self.text = ''.join(['some random text '] * 100)
        try:
            self.bytes = bytes(self.text, encoding='utf-8')
        except TypeError:
            self.bytes = self.text
        out = openFile(self.gzfn, 'wt')
        out.write(self.text)
        out.close()
Ejemplo n.º 24
0
def fetchPDBClusters(sqid=None):
    """Retrieve PDB sequence clusters.  PDB sequence clusters are results of
    the weekly clustering of protein chains in the PDB generated by blastclust.
    They are available at FTP site: ftp://resources.rcsb.org/sequence/clusters/

    This function will download about 10 Mb of data and save it after
    compressing in your home directory in :file:`.prody/pdbclusters`.
    Compressed files will be less than 4 Mb in size.  Cluster data can
    be loaded using :func:`loadPDBClusters` function and be accessed
    using :func:`listPDBCluster`."""

    if sqid is not None:
        if isListLike(sqid):
            for s in sqid:
                if s not in PDB_CLUSTERS:
                    raise ValueError('sqid must be one or more of ' +
                                     PDB_CLUSTERS_SQID_STR)
            keys = list(sqid)
        else:
            if sqid not in PDB_CLUSTERS:
                raise ValueError('sqid must be one or more of ' +
                                 PDB_CLUSTERS_SQID_STR)
            keys = [sqid]
    else:
        keys = list(PDB_CLUSTERS)

    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if not os.path.isdir(PDB_CLUSTERS_PATH):
        os.mkdir(PDB_CLUSTERS_PATH)
    LOGGER.progress('Downloading sequence clusters', len(keys),
                    '_prody_fetchPDBClusters')
    count = 0
    for i, x in enumerate(keys):
        filename = 'bc-{0}.out'.format(x)
        url = ('ftp://resources.rcsb.org/sequence/clusters/' + filename)
        try:
            inp = openURL(url)
        except IOError:
            LOGGER.warning('Clusters at {0}% sequence identity level could '
                           'not be downloaded.'.format(x))
            continue
        else:
            out = openFile(filename + '.gz', 'w', folder=PDB_CLUSTERS_PATH)
            out.write(inp.read())
            inp.close()
            out.close()
            count += 1
        LOGGER.update(i, label='_prody_fetchPDBClusters')
    LOGGER.finish()
    if len(keys) == count:
        LOGGER.info('All selected PDB clusters were downloaded successfully.')
    elif count == 0:
        LOGGER.warn('PDB clusters could not be downloaded.')
Ejemplo n.º 25
0
    def setUp(self):

        self.pref = join(TEMPDIR, 'compressed.txt')
        self.gzfn = self.pref + '.gz'
        self.text = ''.join(['some random text '] * 100)
        try:
            self.bytes = bytes(self.text, encoding='utf-8')
        except TypeError:
            self.bytes = self.text
        out = openFile(self.gzfn, 'wt')
        out.write(self.text)
        out.close()
Ejemplo n.º 26
0
def writePDB(filename, atoms, csets=None, autoext=True, **kwargs):
    """Write *atoms* in PDB format to a file with name *filename* and return
    *filename*.  If *filename* ends with :file:`.gz`, a compressed file will
    be written."""

    if not ('.pdb' in filename or '.pdb.gz' in filename or
             '.ent' in filename or '.ent.gz' in filename):
        filename += '.pdb'
    out = openFile(filename, 'wt')
    writePDBStream(out, atoms, csets, **kwargs)
    out.close()
    return filename
Ejemplo n.º 27
0
def writePDB(filename, atoms, csets=None, autoext=True, **kwargs):
    """Write *atoms* in PDB format to a file with name *filename* and return
    *filename*.  If *filename* ends with :file:`.gz`, a compressed file will
    be written."""

    if not ('.pdb' in filename or '.pdb.gz' in filename or '.ent' in filename
            or '.ent.gz' in filename):
        filename += '.pdb'
    out = openFile(filename, 'wt')
    writePDBStream(out, atoms, csets, **kwargs)
    out.close()
    return filename
Ejemplo n.º 28
0
def saveModel(nma, filename=None, matrices=False, **kwargs):
    """Save *nma* model data as :file:`filename.nma.npz`.  By default,
    eigenvalues, eigenvectors, variances, trace of covariance matrix,
    and name of the model will be saved.  If *matrices* is ``True``,
    covariance, Hessian or Kirchhoff matrices are saved too, whichever
    are available.  If *filename* is ``None``, name of the NMA instance
    will be used as the filename, after ``" "`` (white spaces) in the name
    are replaced with ``"_"`` (underscores).  Extension may differ based
    on the type of the NMA model.  For ANM models, it is :file:`.anm.npz`.
    Upon successful completion of saving, filename is returned. This
    function makes use of :func:`numpy.savez` function."""

    if not isinstance(nma, NMA):
        raise TypeError('invalid type for nma, {0}'.format(type(nma)))
    if len(nma) == 0:
        raise ValueError('nma instance does not contain data')

    dict_ = nma.__dict__
    attr_list = ['_title', '_trace', '_array', '_eigvals', '_vars', '_n_atoms',
                 '_dof', '_n_modes']
    if filename is None:
        filename = nma.getTitle().replace(' ', '_')
    if isinstance(nma, GNMBase):
        attr_list.append('_cutoff')
        attr_list.append('_gamma')
        if matrices:
            attr_list.append('_kirchhoff')
            if isinstance(nma, ANM):
                attr_list.append('_hessian')
        if isinstance(nma, ANM):
            type_ = 'ANM'
        else:
            type_ = 'GNM'
    elif isinstance(nma, EDA):
        type_ = 'EDA'
    elif isinstance(nma, PCA):
        type_ = 'PCA'
    else:
        type_ = 'NMA'

    if matrices:
        attr_list.append('_cov')
    attr_dict = {'type': type_}
    for attr in attr_list:
        value = dict_[attr]
        if value is not None:
            attr_dict[attr] = value
    filename += '.' + type_.lower() + '.npz'
    ostream = openFile(filename, 'wb', **kwargs)
    np.savez(ostream, **attr_dict)
    ostream.close()
    return filename
Ejemplo n.º 29
0
def saveModel(nma, filename=None, matrices=False, **kwargs):
    """Save *nma* model data as :file:`filename.nma.npz`.  By default, 
    eigenvalues, eigenvectors, variances, trace of covariance matrix, 
    and name of the model will be saved.  If *matrices* is ``True``, 
    covariance, Hessian or Kirchhoff matrices are saved too, whichever 
    are available.  If *filename* is ``None``, name of the NMA instance 
    will be used as the filename, after ``" "`` (white spaces) in the name 
    are replaced with ``"_"`` (underscores).  Extension may differ based 
    on the type of the NMA model.  For ANM models, it is :file:`.anm.npz`.
    Upon successful completion of saving, filename is returned. This 
    function makes use of :func:`numpy.savez` function."""

    if not isinstance(nma, NMA):
        raise TypeError("invalid type for nma, {0:s}".format(type(nma)))
    if len(nma) == 0:
        raise ValueError("nma instance does not contain data")

    dict_ = nma.__dict__
    attr_list = ["_title", "_trace", "_array", "_eigvals", "_vars", "_n_atoms", "_dof", "_n_modes"]
    if filename is None:
        filename = nma.getTitle().replace(" ", "_")
    if isinstance(nma, GNMBase):
        attr_list.append("_cutoff")
        attr_list.append("_gamma")
        if matrices:
            attr_list.append("_kirchhoff")
            if isinstance(nma, ANM):
                attr_list.append("_hessian")
        if isinstance(nma, ANM):
            type_ = "ANM"
        else:
            type_ = "GNM"
    elif isinstance(nma, EDA):
        type_ = "EDA"
    elif isinstance(nma, PCA):
        type_ = "PCA"
    else:
        type_ = "NMA"

    if matrices:
        attr_list.append("_cov")
    attr_dict = {"type": type_}
    for attr in attr_list:
        value = dict_[attr]
        if value is not None:
            attr_dict[attr] = value
    filename += "." + type_.lower() + ".npz"
    ostream = openFile(filename, "wb", **kwargs)
    np.savez(ostream, **attr_dict)
    ostream.close()
    return filename
Ejemplo n.º 30
0
def parseCIF(pdb, **kwargs):
    """Returns an :class:`.AtomGroup` and/or dictionary containing header data
    parsed from an mmCIF file. If not found, the mmCIF file will be downloaded
    from the PDB. It will be downloaded in uncompressed format regardless of
    the compressed keyword.

    This function extends :func:`.parseCIFStream`.

    See :ref:`parsecif` for a detailed usage example.

    :arg pdb: a PDB identifier or a filename
        If needed, mmCIF files are downloaded using :func:`.fetchPDB()` function.
    :type pdb: str
    """
    title = kwargs.get('title', None)
    if not os.path.isfile(pdb):
        if len(pdb) == 4 and pdb.isalnum():
            if title is None:
                title = pdb
                kwargs['title'] = title

            if os.path.isfile(pdb + '.cif'):
                filename = pdb + '.cif'
            elif os.path.isfile(pdb + '.cif.gz'):
                filename = pdb + '.cif.gz'
            else:
                filename = fetchPDB(pdb,
                                    report=True,
                                    format='cif',
                                    compressed=False)
                if filename is None:
                    raise IOError(
                        'mmCIF file for {0} could not be downloaded.'.format(
                            pdb))
            pdb = filename
        else:
            raise IOError('{0} is not a valid filename or a valid PDB '
                          'identifier.'.format(pdb))
    if title is None:
        title, ext = os.path.splitext(os.path.split(pdb)[1])
        if ext == '.gz':
            title, ext = os.path.splitext(title)
        if len(title) == 7 and title.startswith('pdb'):
            title = title[3:]
        kwargs['title'] = title
    cif = openFile(pdb, 'rt')
    result = parseCIFStream(cif, **kwargs)
    cif.close()
    return result
Ejemplo n.º 31
0
def saveVector(vector, filename, **kwargs):
    """Save *vector* data as :file:`filename.vec.npz`.  Upon successful
    completion of saving, filename is returned.  This function makes use
    of :func:`numpy.savez` function."""

    if not isinstance(vector, Vector):
        raise TypeError('invalid type for vector, {0}'.format(type(vector)))
    attr_dict = {}
    attr_dict['title'] = vector.getTitle()
    attr_dict['array'] = vector._getArray()
    attr_dict['is3d'] = vector.is3d()
    filename += '.vec.npz'
    ostream = openFile(filename, 'wb', **kwargs)
    np.savez(ostream, **attr_dict)
    ostream.close()
    return filename
Ejemplo n.º 32
0
def saveVector(vector, filename, **kwargs):
    """Save *vector* data as :file:`filename.vec.npz`.  Upon successful
    completion of saving, filename is returned.  This function makes use
    of :func:`numpy.savez` function."""

    if not isinstance(vector, Vector):
        raise TypeError('invalid type for vector, {0}'.format(type(vector)))
    attr_dict = {}
    attr_dict['title'] = vector.getTitle()
    attr_dict['array'] = vector._getArray()
    attr_dict['is3d'] = vector.is3d()
    filename += '.vec.npz'
    ostream = openFile(filename, 'wb', **kwargs)
    np.savez(ostream, **attr_dict)
    ostream.close()
    return filename
Ejemplo n.º 33
0
def writePDB(filename, atoms, csets=None, autoext=True, **kwargs):
    """Write *atoms* in PDB format to a file with name *filename* and return
    *filename*.  If *filename* ends with :file:`.gz`, a compressed file will
    be written.        

    :arg renumber: whether to renumber atoms with serial indices
        Default is **True**
    :type renumber: bool
    """

    if not ('.pdb' in filename or '.pdb.gz' in filename or
             '.ent' in filename or '.ent.gz' in filename):
        filename += '.pdb'
    out = openFile(filename, 'wt')
    writePDBStream(out, atoms, csets, **kwargs)
    out.close()
    return filename
Ejemplo n.º 34
0
def writePDB(filename, atoms, csets=None, autoext=True, **kwargs):
    """Write *atoms* in PDB format to a file with name *filename* and return
    *filename*.  If *filename* ends with :file:`.gz`, a compressed file will
    be written.        

    :arg renumber: whether to renumber atoms with serial indices
        Default is **True**
    :type renumber: bool
    """

    if not ('.pdb' in filename or '.pdb.gz' in filename or '.ent' in filename
            or '.ent.gz' in filename):
        filename += '.pdb'
    out = openFile(filename, 'wt')
    writePDBStream(out, atoms, csets, **kwargs)
    out.close()
    return filename
Ejemplo n.º 35
0
def fetchPDBClusters(sqid=None):
    """Retrieve PDB sequence clusters.  PDB sequence clusters are results of 
    the weekly clustering of protein chains in the PDB generated by blastclust. 
    They are available at FTP site: ftp://resources.rcsb.org/sequence/clusters/
    
    This function will download about 10 Mb of data and save it after 
    compressing in your home directory in :file:`.prody/pdbclusters`.
    Compressed files will be less than 4 Mb in size.  Cluster data can 
    be loaded using :func:`loadPDBClusters` function and be accessed 
    using :func:`listPDBCluster`."""
    
    if sqid is not None:
        if sqid not in PDB_CLUSTERS:
            raise ValueError('sqid must be one of ' + PDB_CLUSTERS_SQID_STR)
        keys = [sqid]
    else:
        keys = list(PDB_CLUSTERS)
    
    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if not os.path.isdir(PDB_CLUSTERS_PATH):
        os.mkdir(PDB_CLUSTERS_PATH)
    LOGGER.progress('Downloading sequence clusters', len(PDB_CLUSTERS),
                    '_prody_fetchPDBClusters')
    count = 0
    for i, x in enumerate(keys):
        filename = 'bc-{0}.out'.format(x)
        url = ('ftp://resources.rcsb.org/sequence/clusters/' + filename)
        try:
            inp = openURL(url)
        except IOError:
            LOGGER.warning('Clusters at {0}% sequence identity level could '
                           'not be downloaded.')
            continue
        else:
            out = openFile(filename+'.gz', 'w', folder=PDB_CLUSTERS_PATH) 
            out.write(inp.read())
            inp.close()
            out.close()
            count += 1
        LOGGER.update(i, '_prody_fetchPDBClusters')
    LOGGER.clear()
    if len(PDB_CLUSTERS) == count:
        LOGGER.info('All PDB clusters were downloaded successfully.')
    elif count == 0:
        LOGGER.warn('PDB clusters could not be downloaded.')
Ejemplo n.º 36
0
def parseCIF(pdb, **kwargs):
    """Returns an :class:`.AtomGroup` and/or dictionary containing header data
    parsed from an mmCIF file. If not found, the mmCIF file will be downloaded
    from the PDB. It will be downloaded in uncompressed format regardless of
    the compressed keyword.

    This function extends :func:`.parseCIFStream`.

    See :ref:`parsecif` for a detailed usage example.

    :arg pdb: a PDB identifier or a filename
        If needed, mmCIF files are downloaded using :func:`.fetchPDB()` function.
    :type pdb: str
    """
    title = kwargs.get('title', None)
    if not os.path.isfile(pdb):
        if len(pdb) == 4 and pdb.isalnum():
            if title is None:
                title = pdb
                kwargs['title'] = title

            if os.path.isfile(pdb + '.cif'):
                filename = pdb + '.cif'
            elif os.path.isfile(pdb + '.cif.gz'):
                filename = pdb + '.cif.gz'
            else:
                filename = fetchPDB(pdb, report=True, format='cif',compressed=False)
                if filename is None:
                    raise IOError('mmCIF file for {0} could not be downloaded.'
                                  .format(pdb))
            pdb = filename
        else:
            raise IOError('{0} is not a valid filename or a valid PDB '
                          'identifier.'.format(pdb))
    if title is None:
        title, ext = os.path.splitext(os.path.split(pdb)[1])
        if ext == '.gz':
            title, ext = os.path.splitext(title)
        if len(title) == 7 and title.startswith('pdb'):
            title = title[3:]
        kwargs['title'] = title
    cif = openFile(pdb, 'rt')
    result = parseCIFStream(cif, **kwargs)
    cif.close()
    return result
Ejemplo n.º 37
0
def saveModeEnsemble(mode_ensemble, filename=None, atoms=False, **kwargs):
    """Save *mode_ensemble* as :file:`filename.modeens.npz`.  If *filename* 
    is **None**, title of the ModeEnsemble instance will be used as the 
    filename, after ``" "`` (white spaces) in the title are replaced with 
    ``"_"`` (underscores).  Upon successful completion of saving, filename 
    is returned. This function makes use of :func:`~numpy.savez_compressed` 
    function."""

    if not isinstance(mode_ensemble, ModeEnsemble):
        raise TypeError('invalid type for mode_ensemble, {0}'.format(
            type(mode_ensemble)))
    if len(mode_ensemble) == 0:
        raise ValueError('mode_ensemble instance does not contain data')

    attr_list = ['_modesets', '_title', '_labels', '_weights', '_matched']
    attr_dict = {}

    if atoms:
        attr_list.append('_atoms')

    for attr in attr_list:
        value = getattr(mode_ensemble, attr)
        if value is not None:
            if attr == '_atoms':
                value = [value, None]
            if attr == '_modesets':
                value = list(value)
                value.append(None)
            attr_dict[attr] = value

    if filename is None:
        filename = mode_ensemble.getTitle().replace(' ', '_')

    suffix = '.modeens'
    if not filename.lower().endswith('.npz'):
        if not filename.lower().endswith(suffix):
            filename += suffix + '.npz'
        else:
            filename += '.npz'

    ostream = openFile(filename, 'wb', **kwargs)
    np.savez_compressed(ostream, **attr_dict)
    ostream.close()

    return filename
Ejemplo n.º 38
0
def parseSTAR(filename, **kwargs):
    """Returns a dictionary containing data
    parsed from a Relion STAR file.

    :arg filename: a filename
        The .star extension can be omitted.
    :type filename: str

    :arg start: line number for starting
        Default is **None**, meaning start at the beginning
    :type start: int, None

    :arg stop: line number for stopping
        Default is **None**, meaning don't stop.
    :type stop: int, None

    :arg shlex: whether to use shlex for splitting lines so as to preserve quoted substrings
        Default is **False**
    :type shlex: bool
    """
    if not os.path.isfile(filename) and not os.path.isfile(filename + '.star'):
        raise IOError('There is no file called {0}.'.format(filename))

    start = kwargs.get('start', None)
    if start is not None and not isinstance(start, Integral):
        raise TypeError('start should be an integer or None')

    stop = kwargs.get('stop', None)
    if stop is not None and not isinstance(stop, Integral):
        raise TypeError('stop should be an integer or None')

    shlex = kwargs.get('shlex', False)
    if not isinstance(shlex, bool):
        raise TypeError('shlex should be a boolean')

    starfile = openFile(filename, 'r')
    lines = starfile.readlines()
    lines = [pystr(line) for line in lines]
    starfile.close()

    parsingDict, prog = parseSTARLines(lines, **kwargs)

    return StarDict(parsingDict, prog, filename)
Ejemplo n.º 39
0
def parseEMD(emd, **kwargs):
    """Returns an :class:`.AtomGroup` containing the information parsed from EMD file. 

    This function extends :func:`.parseEMDStream`.

    See :ref:`parseEMD` for a detailed usage example. 

    :arg emd: an EMD identifier or a file name, EMD files should be locally available. 

    :arg cutoff: density cutoff to read EMD map. The regions with lower density than given cutoff 
    are discarded.
    :type cutoff: float

    :arg n_nodes: A bead based network will be constructed over provided density map. n_nodes parameter
    will show the number of beads that will fit to density map. 
    :type n_nodes: integer  

    :arg num_iter: After reading density map, coordinates are predicted with topological domain reconstruction
    method. This parameter is the total number of iterations of this algorithm: 
    :type num_iter: integer
    """

    title = kwargs.get('title', None)
    cutoff = float(kwargs.get('cutoff', 1.20))
    n_nodes = int(kwargs.get('n_nodes', 1000))
    num_iter = int(kwargs.get('num_iter', 20))
    if not os.path.isfile(emd):
        raise IOError('EMD file {0} is not available in the directory {1}'
                        .format(emd),os.getcwd())
    if title is None:
        title, ext = os.path.splitext(os.path.split(emd)[1])
        kwargs['title'] = title
    kwargs['cutoff'] = cutoff
    kwargs['n_nodes'] = n_nodes
    emdStream = openFile(emd, 'rb')
    result = parseEMDStream(emdStream, **kwargs)
    emdStream.close()
    return result
Ejemplo n.º 40
0
def fetchPDBClusters():
    """Downloads PDB sequence clusters.  PDB sequence clusters are results of 
    the weekly clustering of protein chains in the PDB generated by blastclust. 
    They are available at FTP site: ftp://resources.rcsb.org/sequence/clusters/
    
    This function will download about 10 Mb of data and save it after 
    compressing in your home directory in :file:`.prody/pdbclusters`.
    Compressed files will be less than 4 Mb in size.  Cluster data can 
    be loaded using :func:`loadPDBClusters` function and be accessed 
    using :func:`getPDBCluster`."""
    
    import urllib2
    PDB_CLUSTERS_PATH = os.path.join(getPackagePath(), 'pdbclusters')
    if not os.path.isdir(PDB_CLUSTERS_PATH):
        os.mkdir(PDB_CLUSTERS_PATH)
    LOGGER.progress('Downloading sequence clusters', len(PDB_CLUSTERS))
    count = 0
    for i, x in enumerate(PDB_CLUSTERS.keys()):
        filename = 'bc-{0:d}.out'.format(x)
        url = ('ftp://resources.rcsb.org/sequence/clusters/' + filename)
        try:
            inp = urllib2.urlopen(url)
        except urllib2.HTTPError:
            LOGGER.warning('Clusters at {0:d}% sequence identity level could '
                           'not be downloaded.')
            continue
        else:
            out = openFile(filename+'.gz', 'w', folder=PDB_CLUSTERS_PATH) 
            out.write(inp.read())
            inp.close()
            out.close()
            count += 1
        LOGGER.update(i)
    LOGGER.clear()
    if len(PDB_CLUSTERS) == count:
        LOGGER.info('All PDB clusters were downloaded successfully.')
    elif count == 0:
        LOGGER.warning('PDB clusters could not be downloaded.')
Ejemplo n.º 41
0
def parsePDB(pdb, **kwargs):
    """Return an :class:`.AtomGroup` and/or dictionary containing header data
    parsed from a PDB file.

    This function extends :func:`.parsePDBStream`.

    See :ref:`parsepdb` for a detailed usage example.

    :arg pdb: a PDB identifier or a filename
        If needed, PDB files are downloaded using :func:`.fetchPDB()` function.
    """
    print "tria"
    title = kwargs.get('title', None)
    if not os.path.isfile(pdb):
        if len(pdb) == 4 and pdb.isalnum():
            if title is None:
                title = pdb
                kwargs['title'] = title
            filename = fetchPDB(pdb, report=True)
            if filename is None:
                raise IOError(
                    'PDB file for {0} could not be downloaded.'.format(pdb))
            pdb = filename
        else:
            raise IOError('{0} is not a valid filename or a valid PDB '
                          'identifier.'.format(pdb))
    if title is None:
        title, ext = os.path.splitext(os.path.split(pdb)[1])
        if ext == '.gz':
            title, ext = os.path.splitext(title)
        if len(title) == 7 and title.startswith('pdb'):
            title = title[3:]
        kwargs['title'] = title
    pdb = openFile(pdb, 'rt')
    result = parsePDBStream(pdb, **kwargs)
    pdb.close()
    return result
Ejemplo n.º 42
0
def prody_fetch(*pdb, **kwargs):
    """Fetch PDB files from PDB FTP server.

    :arg pdbs: PDB identifier(s) or filename(s)

    :arg dir: target directory for saving PDB file(s), default is ``'.'``

    :arg gzip: gzip fetched files or not, default is ``False``"""

    import prody


    pdblist = pdb
    if len(pdblist) == 1 and os.path.isfile(pdblist[0]):
        from prody.utilities import openFile
        with openFile(pdblist[0]) as inp:
            for item in inp.read().strip().split():
                for pdb in item.split(','):
                    if len(pdb) == 4 and pdb.isalnum():
                        pdblist.append(pdb)

    prody.fetchPDB(*pdblist, folder=kwargs.get('folder', '.'),
                   compressed=kwargs.get('gzip', False),
                   copy=True)
Ejemplo n.º 43
0
def prody_fetch(*pdb, **kwargs):
    """Fetch PDB files from PDB FTP server.

    :arg pdbs: PDB identifier(s) or filename(s)

    :arg dir: target directory for saving PDB file(s), default is ``'.'``

    :arg gzip: gzip fetched files or not, default is ``False``"""

    import prody

    pdblist = pdb
    if len(pdblist) == 1 and os.path.isfile(pdblist[0]):
        from prody.utilities import openFile
        with openFile(pdblist[0]) as inp:
            for item in inp.read().strip().split():
                for pdb in item.split(','):
                    if len(pdb) == 4 and pdb.isalnum():
                        pdblist.append(pdb)

    prody.fetchPDB(*pdblist,
                   folder=kwargs.get('folder', '.'),
                   compressed=kwargs.get('gzip', False),
                   copy=True)
Ejemplo n.º 44
0
def parseHeatmap(heatmap, **kwargs):
    """Returns a two dimensional array and a dictionary with information parsed
    from *heatmap*, which may be an input stream or an :file:`.hm` file in VMD
    plugin Heat Mapper format."""

    try:
        readline, close = heatmap.readline, lambda: None
    except AttributeError:
        heatmap = openFile(heatmap)
        readline, close = heatmap.readline, heatmap.close

    meta = {}
    arrs = []

    line = readline()
    while line:
        if line.startswith('-'):
            label, data = line[1:].split(None, 1)
            data = data.strip()
            if data[0] == data[-1] == '"':
                data = data[1:-1]
            label = label.strip()
            try:
                meta[label] = HMTYPES[label](data)
            except KeyError:
                LOGGER.warn('Unrecognized label encountered: {0}'.format(
                    repr(label)))
                meta[label] = HMTYPES[label](data)
            except TypeError:
                LOGGER.warn('Could not parse data with label {0}.'.format(
                    repr(label)))
        else:
            arrs.append(line.rstrip())
        line = readline()
    close()
    nnums = len(meta.get('numbering', ''))
    heatmap = []
    numbers = []

    for arr in arrs:
        if nnums:
            items = arr.split(':', nnums + 1)
            numbers.append(items[:nnums])
        else:
            items = [arr]
        heatmap.append(fromstring(items[-1], float, sep=';'))

    heatmap = array(heatmap)
    if nnums:
        numbering = meta['numbering']
        try:
            numbers = array(numbers, int)
        except ValueError:
            try:
                numbers = array(numbers, float)
            except ValueError:
                LOGGER.warn('Numbering for y-axis could not be parsed.')
                numbering = []
        for i, label in enumerate(numbering):
            meta[label] = numbers[:, i].copy()

    return heatmap, meta
Ejemplo n.º 45
0
def writeHeatmap(filename, heatmap, **kwargs):
    """Returns *filename* that contains *heatmap* in Heat Mapper :file:`.hm`
    file (extension is automatically added when not found).  *filename* may
    also be an output stream.

    :arg title: title of the heatmap
    :type title: str

    :arg xlabel: x-axis lab, default is ``'unknown'``
    :type xlabel: str

    :arg ylabel: y-axis lab, default is ``'unknown'``
    :type ylabel: str

    :arg xorigin: x-axis origin, default is 0
    :type xorigin: float

    :arg xstep: x-axis step, default is 1
    :type xstep: float

    :arg min: minimum value, default is minimum in *heatmap*
    :type min: float

    :arg max: maximum value, default is maximum in *heatmap*
    :type max: float

    :arg format: number format, default is ``'%f'``
    :type format: str

    Other keyword arguments that are arrays with length equal to the y-axis
    (second dimension of heatmap) will be considered as *numbering*."""

    try:
        ndim, shape = heatmap.ndim, heatmap.shape
    except:
        raise TypeError('heatmap must be an array object')
    if ndim != 2:
        raise TypeError('heatmap must be a 2D array')

    try:
        write, close, stream = filename.write, lambda: None, filename
    except AttributeError:
        out = openFile(addext(filename, '.hm'), 'w')
        write, close, stream = out.write, out.close, out

    format = kwargs.pop('format', '%f')
    write('-min "{0}"\n'.format(kwargs.pop('min', heatmap.min())))
    write('-max "{0}"\n'.format(kwargs.pop('max', heatmap.max())))
    for label, default in [
        ('title', 'unknown'),
        ('xlabel', 'unknown'),
        ('xorigin', 0),
        ('xstep', 1),
        ('ylabel', 'unknown'),
    ]:
        write('-{0} "{1}"\n'.format(label, kwargs.pop(label, default)))

    numbering = []
    numlabels = []
    for key, val in kwargs.items():
        try:
            length = len(val)
        except TypeError:
            LOGGER.warn('Keyword argument {0} is not used.'.format(key))
        else:
            if length == shape[0]:
                numlabels.append(key)
                numbering.append(val)
    if not numbering:
        numlabels.append('unknown')
        numbering.append(arange(1, shape[0] + 1))

    write('-numbering "{0}"\n'.format(':'.join(numlabels)))

    for i, row in enumerate(heatmap):
        write(':'.join(str(nums[i]) for nums in numbering) + ':')
        row.tofile(stream, sep=';', format=format)
        write(';\n')

    close()
    return filename
Ejemplo n.º 46
0
def parsePSF(filename, title=None, ag=None):
    """Return an :class:`.AtomGroup` instance storing data parsed from X-PLOR
    format PSF file *filename*.  Atom and bond information is parsed from the
    file.  If *title* is not given, *filename* will be set as the title of the
    :class:`.AtomGroup` instance.  An :class:`.AtomGroup` instance may be
    provided as *ag* argument.  When provided, *ag* must have the same number
    of atoms in the same order as the file.  Data from PSF file will be added
    to the *ag*.  This may overwrite present data if it overlaps with PSF file
    content.  Note that this function does not evaluate angles, dihedrals, and
    impropers sections."""

    if ag is not None:
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')

    psf = openFile(filename, 'rb')
    line = psf.readline()
    i_line = 1
    while line:
        line = line.strip()
        if line.endswith('!NATOM'):
            n_atoms = int(line.split('!')[0])
            break
        line = psf.readline()
        i_line += 1
    if title is None:
        title = os.path.splitext(os.path.split(filename)[1])[0]
    else:
        title = str(title)
    if ag is None:
        ag = AtomGroup(title)
    else:
        if n_atoms != ag.numAtoms():
            raise ValueError('ag and PSF file must have same number of atoms')

    serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype)
    segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype)
    resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype)
    resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype)
    atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype)
    atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype)
    charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype)
    masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype)

    lines = psf.readlines(71 * (n_atoms + 5))
    if len(lines) < n_atoms:
        raise IOError('number of lines in PSF is less than the number of '
                      'atoms')

    for i, line in enumerate(lines):
        if i == n_atoms:
            break
        i_line += 1
        if len(line) <= 71:
            serials[i] = line[:8]
            segnames[i] = line[9:13].strip()
            resnums[i] = line[14:19]
            resnames[i] = line[19:23].strip()
            atomnames[i] = line[24:28].strip()
            atomtypes[i] = line[29:35].strip()
            charges[i] = line[35:44]
            masses[i] = line[50:60]
        else:
            items = line.split()
            serials[i] = items[0]
            segnames[i] = items[1]
            resnums[i] = items[2]
            resnames[i] = items[3]
            atomnames[i] = items[4]
            atomtypes[i] = items[5]
            charges[i] = items[6]
            masses[i] = items[7]

    i = n_atoms
    while 1:
        line = lines[i].split()
        if len(line) >= 2 and line[1] == '!NBOND:':
             n_bonds = int(line[0])
             break
        i += 1
    lines = ''.join(lines[i+1:]) + psf.read(n_bonds/4 * 71)
    array = fromstring(lines, count=n_bonds*2, dtype=int, sep=' ')
    if len(array) != n_bonds*2:
        raise IOError('number of bonds expected and parsed do not match')

    psf.close()
    ag.setSerials(serials)
    ag.setSegnames(segnames)
    ag.setResnums(resnums)
    ag.setResnames(resnames)
    ag.setNames(atomnames)
    ag.setTypes(atomtypes)
    ag.setCharges(charges)
    ag.setMasses(masses)

    array = add(array, -1, array)
    ag.setBonds(array.reshape((n_bonds, 2)))

    return ag
Ejemplo n.º 47
0
def parseEMD(emd, **kwargs):
    """Returns an :class:`.AtomGroup` containing the information parsed from EMD file. 

    This function extends :func:`.parseEMDStream`.

    See :ref:`parseEMD` for a detailed usage example. 

    :arg emd: an EMD identifier or a file name. A 4-digit EMDataBank identifier can be provided
    to download it via FTP.
    :type emd: str

    :arg cutoff: density cutoff to read EMD map. The regions with lower density than given cutoff 
    are discarded.
    :type cutoff: float

    :arg n_nodes: A bead based network will be constructed over provided density map. n_nodes parameter
    will show the number of beads that will fit to density map. 
    :type n_nodes: int  

    :arg num_iter: After reading density map, coordinates are predicted with topological domain reconstruction
    method. This parameter is the total number of iterations of this algorithm: 
    :type num_iter: int

    :arg map: Return the density map itself. Default is **False** in line with previous behaviour.
        This value is reset to **True** if make_nodes is **False** as something must be returned.
    :type map: bool

    :arg make_nodes: Use the topology representing network algorithm to fit pseudoatom nodes to the map.
        Default is **False** and sets map to **True**.
    :type make_nodes: bool
    """

    title = kwargs.get('title', None)
    if not os.path.isfile(emd):
        if len(emd) == 4 and emd.isdigit():
            if title is None:
                title = emd
                kwargs['title'] = title

            if os.path.isfile(emd + '.map'):
                filename = emd + '.map'
            elif os.path.isfile(emd + '.map.gz'):
                filename = emd + '.map.gz'
            else:
                filename = fetchPDB(emd, report=True, format='emd',compressed=False)
                if filename is None:
                    raise IOError('EMD map file for {0} could not be downloaded.'
                                  .format(emd))
            emd = filename
        else:   
            raise IOError('EMD file {0} is not available in the directory {1}'
                           .format(emd, os.getcwd()))
    if title is None:
        kwargs['title'], ext = os.path.splitext(os.path.split(emd)[1])

    emdStream = openFile(emd, 'rb')

    if emd.endswith('.stk'):
        result = parseSTKStream(emd, **kwargs)
    else:
        result = parseEMDStream(emdStream, **kwargs)

    emdStream.close()

    return result
Ejemplo n.º 48
0
def writePSF(filename, atoms):
    """Write atoms in X-PLOR format PSF file with name *filename* and return
    *filename*.  This function will write available atom and bond information
    only."""

    if not filename.endswith('.psf'):
        filename = filename + '.psf'

    try:
        n_atoms, segments, rnums, rnames, names, types, charges, masses = (
            atoms.numAtoms(), atoms._getSegnames(), atoms._getResnums(),
            atoms._getResnames(), atoms._getNames(), atoms._getTypes(),
            atoms._getCharges(), atoms._getMasses())

    except AttributeError:
        raise TypeError('atoms must be an Atomic instance')

    if segments is None:
        segments = atoms._getChids()
        if segments is None:
            segments = ['UNK'] * n_atoms

    if rnums is None:
        rnums = ones(n_atoms, int)

    if rnames is None:
        rnames = ['UNK'] * n_atoms

    if names is None:
        raise ValueError('atom names are not set')

    if types is None:
        types = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype)

    if charges is None:
        charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype)

    if masses is None:
        masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype)

    long_fields = array([len(tp) for tp in types]).max() > 4

    out = openFile(filename, 'w')
    write = out.write
    write('PSF{0}\n'.format(' NAMD' if long_fields else ''))
    write('\n')
    write('{0:8d} !NTITLE\n'.format(1))
    write(' REMARKS {0}\n'.format(str(atoms)))
    write('\n')
    write('{0:8d} !NATOM\n'.format(n_atoms))

    for i in range(n_atoms):
        write(PSFLINE % (i + 1, segments[i], rnums[i], rnames[i], names[i],
                         types[i], charges[i], masses[i], 0))

    bonds = list(atoms._iterBonds())
    if len(bonds) > 0:
        bonds = array(bonds, int) + 1
        write('\n')
        write('{0:8d} !NBOND: bonds\n'.format(len(bonds)))
        for i, bond in enumerate(bonds):
            write('%8s%8s' % (bond[0], bond[1]))
            if i % 4 == 3:
                write('\n')
        if i % 4 != 3:
            write('\n')

    angles = list(atoms._iterAngles())
    if len(angles) > 0:
        angles = array(angles, int) + 1
        write('\n')
        write('{0:8d} !NTHETA: angles\n'.format(len(angles)))
        for i, angle in enumerate(angles):
            write('%8s%8s%8s' % (angle[0], angle[1], angle[2]))
            if i % 3 == 2:
                write('\n')
        if i % 3 != 2:
            write('\n')

    dihedrals = list(atoms._iterDihedrals())
    if len(dihedrals) > 0:
        dihedrals = array(dihedrals, int) + 1
        write('\n')
        write('{0:8d} !NPHI: dihedrals\n'.format(len(dihedrals)))
        for i, dihedral in enumerate(dihedrals):
            write('%8s%8s%8s%8s' %
                  (dihedral[0], dihedral[1], dihedral[2], dihedral[3]))
            if i % 4 == 3:
                write('\n')
        if i % 4 != 3:
            write('\n')

    impropers = list(atoms._iterImpropers())
    if len(impropers) > 0:
        impropers = array(impropers, int) + 1
        write('\n')
        write('{0:8d} !NIMPHI: impropers\n'.format(len(impropers)))
        for i, improper in enumerate(impropers):
            write('%8s%8s%8s%8s' %
                  (improper[0], improper[1], improper[2], improper[3]))
            if i % 2 == 1:
                write('\n')
        if i % 2 != 1:
            write('\n')

    write('\n')

    donors = list(atoms._iterDonors())
    if len(donors) > 0:
        donors = array(donors, int) + 1
        write('\n')
        write('{0:8d} !NDON: donors\n'.format(len(donors)))
        for i, donor in enumerate(donors):
            write('%8s%8s' % (donor[0], donor[1]))
            if i % 4 == 3:
                write('\n')
        if i % 4 != 3:
            write('\n')
    else:
        write('{0:8d} !NDON: donors\n'.format(0))
        write('\n')

    write('\n')

    acceptors = list(atoms._iterAcceptors())
    if len(acceptors) > 0:
        acceptors = array(acceptors, int) + 1
        write('\n')
        write('{0:8d} !NACC: acceptors\n'.format(len(acceptors)))
        for i, acceptor in enumerate(acceptors):
            write('%8s%8s' % (acceptor[0], acceptor[1]))
            if i % 4 == 3:
                write('\n')
        if i % 4 != 3:
            write('\n')
    else:
        write('{0:8d} !NACC: acceptors\n'.format(0))
        write('\n')

    nbexclusions = list(atoms._iterNBExclusions())
    if len(nbexclusions) > 0:
        nbexclusions = array(nbexclusions, int) + 1
        write('\n')
        write('{0:8d} !NNB\n'.format(len(nbexclusions)))
        for i, nbexclusion in enumerate(nbexclusions):
            write('%8s%8s' % (nbexclusion[0], nbexclusion[1]))
            if i % 4 == 3:
                write('\n')
        if i % 4 != 3:
            write('\n')
    else:
        write('{0:8d} !NNB\n'.format(0))
        write('\n')

    crossterms = list(atoms._iterCrossterms())
    if len(crossterms) > 0:
        crossterms = array(crossterms, int) + 1
        write('\n')
        write('{0:8d} !NCRTERM: crossterms\n'.format(len(crossterms)))
        for i, crossterm in enumerate(crossterms):
            write('%8s%8s%8s%8s' %
                  (crossterm[0], crossterm[1], crossterm[2], crossterm[3]))
            if i % 2 == 1:
                write('\n')
        if i % 2 != 1:
            write('\n')

    write('\n')
    out.close()
    return filename
Ejemplo n.º 49
0
def writePQR(filename, atoms):
    """Write *atoms* in PQR format to a file with name *filename*.  Only
    current coordinate set is written.  Returns *filename* upon success.  If
    *filename* ends with :file:`.gz`, a compressed file will be written."""

    if not isinstance(atoms, Atomic):
        raise TypeError('atoms does not have a valid type')
    if isinstance(atoms, Atom):
        atoms = Selection(atoms.getAtomGroup(), [atoms.getIndex()],
                          atoms.getACSIndex(),
                          'index ' + str(atoms.getIndex()))
    stream = openFile(filename, 'w')
    n_atoms = atoms.numAtoms()
    atomnames = atoms.getNames()
    if atomnames is None:
        raise RuntimeError('atom names are not set')
    for i, an in enumerate(atomnames):
        lenan = len(an)
        if lenan < 4:
            atomnames[i] = ' ' + an
        elif lenan > 4:
            atomnames[i] = an[:4]

    s_or_u = np.array(['a']).dtype.char

    resnames = atoms._getResnames()
    if resnames is None:
        resnames = ['UNK'] * n_atoms
    resnums = atoms._getResnums()
    if resnums is None:
        resnums = np.ones(n_atoms, int)
    chainids = atoms._getChids()
    if chainids is None:
        chainids = np.zeros(n_atoms, s_or_u + '1')
    charges = atoms._getCharges()
    if charges is None:
        charges = np.zeros(n_atoms, float)
    radii = atoms._getRadii()
    if radii is None:
        radii = np.zeros(n_atoms, float)
    icodes = atoms._getIcodes()
    if icodes is None:
        icodes = np.zeros(n_atoms, s_or_u + '1')
    hetero = ['ATOM'] * n_atoms
    heteroflags = atoms._getFlags('hetatm')
    if heteroflags is None:
        heteroflags = atoms._getFlags('hetero')
    if heteroflags is not None:
        hetero = np.array(hetero, s_or_u + '6')
        hetero[heteroflags] = 'HETATM'
    altlocs = atoms._getAltlocs()
    if altlocs is None:
        altlocs = np.zeros(n_atoms, s_or_u + '1')

    format = ('{0:6s}{1:5d} {2:4s}{3:1s}' + '{4:4s}{5:1s}{6:4d}{7:1s}   ' +
              '{8:8.3f}{9:8.3f}{10:8.3f}' + '{11:8.4f}{12:7.4f}\n').format
    coords = atoms._getCoords()
    write = stream.write
    for i, xyz in enumerate(coords):
        write(
            format(hetero[i], i + 1,
                   atomnames[i], altlocs[i], resnames[i], chainids[i],
                   int(resnums[i]), icodes[i], xyz[0], xyz[1], xyz[2],
                   charges[i], radii[i]))
    write('TER\nEND')
    stream.close()
    return filename
Ejemplo n.º 50
0
def prody_anm(pdb, **kwargs):
    """Perform ANM calculations for *pdb*.

    """

    for key in DEFAULTS:
        if not key in kwargs:
            kwargs[key] = DEFAULTS[key]

    from os.path import isdir, join
    outdir = kwargs.get('outdir')
    if not isdir(outdir):
        raise IOError('{0} is not a valid path'.format(repr(outdir)))

    import numpy as np
    import prody
    LOGGER = prody.LOGGER

    selstr = kwargs.get('select')
    prefix = kwargs.get('prefix')
    cutoff = kwargs.get('cutoff')
    gamma = kwargs.get('gamma')
    nmodes = kwargs.get('nmodes')
    selstr = kwargs.get('select')
    model = kwargs.get('model')

    pdb = prody.parsePDB(pdb, model=model)
    if prefix == '_anm':
        prefix = pdb.getTitle() + '_anm'

    select = pdb.select(selstr)
    if select is None:
        LOGGER.warn('Selection {0} did not match any atoms.'
                    .format(repr(selstr)))
        return
    LOGGER.info('{0} atoms will be used for ANM calculations.'
                .format(len(select)))

    anm = prody.ANM(pdb.getTitle())
    anm.buildHessian(select, cutoff, gamma)
    anm.calcModes(nmodes)
    LOGGER.info('Writing numerical output.')
    if kwargs.get('outnpz'):
        prody.saveModel(anm, join(outdir, prefix))
    prody.writeNMD(join(outdir, prefix + '.nmd'), anm, select)

    extend = kwargs.get('extend')
    if extend:
        if extend == 'all':
            extended = prody.extendModel(anm, select, pdb)
        else:
            extended = prody.extendModel(anm, select, select | pdb.bb)
        prody.writeNMD(join(outdir, prefix + '_extended_' +
                       extend + '.nmd'), *extended)

    outall = kwargs.get('outall')
    delim = kwargs.get('numdelim')
    ext = kwargs.get('numext')
    format = kwargs.get('numformat')


    if outall or kwargs.get('outeig'):
        prody.writeArray(join(outdir, prefix + '_evectors'+ext),
                         anm.getArray(), delimiter=delim, format=format)
        prody.writeArray(join(outdir, prefix + '_evalues'+ext),
                         anm.getEigvals(), delimiter=delim, format=format)

    if outall or kwargs.get('outbeta'):
        from prody.utilities import openFile
        fout = openFile(prefix + '_beta.txt', 'w', folder=outdir)
        fout.write('{0[0]:1s} {0[1]:4s} {0[2]:4s} {0[3]:5s} {0[4]:5s}\n'
                       .format(['C', 'RES', '####', 'Exp.', 'The.']))
        for data in zip(select.getChids(), select.getResnames(),
                        select.getResnums(), select.getBetas(),
                        prody.calcTempFactors(anm, select)):
            fout.write('{0[0]:1s} {0[1]:4s} {0[2]:4d} {0[3]:5.2f} {0[4]:5.2f}\n'
                       .format(data))
        fout.close()

    if outall or kwargs.get('outcov'):
        prody.writeArray(join(outdir, prefix + '_covariance' + ext),
                         anm.getCovariance(), delimiter=delim, format=format)

    if outall or kwargs.get('outcc') or kwargs.get('outhm'):
        cc = prody.calcCrossCorr(anm)
        if outall or kwargs.get('outcc'):
            prody.writeArray(join(outdir, prefix +
                             '_cross-correlations' + ext),
                             cc, delimiter=delim,  format=format)
        if outall or kwargs.get('outhm'):
            prody.writeHeatmap(join(outdir, prefix + '_cross-correlations.hm'),
                               cc, resnum=select.getResnums(),
                               xlabel='Residue', ylabel='Residue',
                               title=anm.getTitle() + ' cross-correlations')

    if outall or kwargs.get('hessian'):
        prody.writeArray(join(outdir, prefix + '_hessian'+ext),
                         anm.getHessian(), delimiter=delim, format=format)

    if outall or kwargs.get('kirchhoff'):
        prody.writeArray(join(outdir, prefix + '_kirchhoff'+ext),
                         anm.getKirchhoff(), delimiter=delim, format=format)

    if outall or kwargs.get('outsf'):
        prody.writeArray(join(outdir, prefix + '_sqflucts'+ext),
                         prody.calcSqFlucts(anm), delimiter=delim,
                         format=format)

    figall = kwargs.get('figall')
    cc = kwargs.get('figcc')
    sf = kwargs.get('figsf')
    bf = kwargs.get('figbeta')
    cm = kwargs.get('figcmap')


    if figall or cc or sf or bf or cm:
        try:
            import matplotlib.pyplot as plt
        except ImportError:
            LOGGER.warning('Matplotlib could not be imported. '
                           'Figures are not saved.')
        else:
            prody.SETTINGS['auto_show'] = False
            LOGGER.info('Saving graphical output.')
            format = kwargs.get('figformat')
            width = kwargs.get('figwidth')
            height = kwargs.get('figheight')
            dpi = kwargs.get('figdpi')
            format = format.lower()

            if figall or cc:
                plt.figure(figsize=(width, height))
                prody.showCrossCorr(anm)
                plt.savefig(join(outdir, prefix + '_cc.'+format),
                    dpi=dpi, format=format)
                plt.close('all')

            if figall or cm:
                plt.figure(figsize=(width, height))
                prody.showContactMap(anm)
                plt.savefig(join(outdir, prefix + '_cm.'+format),
                    dpi=dpi, format=format)
                plt.close('all')

            if figall or sf:
                plt.figure(figsize=(width, height))
                prody.showSqFlucts(anm)
                plt.savefig(join(outdir, prefix + '_sf.'+format),
                    dpi=dpi, format=format)
                plt.close('all')

            if figall or bf:
                plt.figure(figsize=(width, height))
                bexp = select.getBetas()
                bcal = prody.calcTempFactors(anm, select)
                plt.plot(bexp, label='Experimental')
                plt.plot(bcal, label=('Theoretical (R={0:.2f})'
                                        .format(np.corrcoef(bcal, bexp)[0,1])))
                plt.legend(prop={'size': 10})
                plt.xlabel('Node index')
                plt.ylabel('Experimental B-factors')
                plt.title(pdb.getTitle() + ' B-factors')
                plt.savefig(join(outdir, prefix + '_bf.'+format),
                    dpi=dpi, format=format)
                plt.close('all')
Ejemplo n.º 51
0
Archivo: pfam.py Proyecto: njekin/ProDy
def fetchPfamMSA(acc, alignment='full', compressed=False, **kwargs):
    """Return a path to the downloaded Pfam MSA file.

    :arg acc: Pfam ID or Accession Code
    :type acc: str

    :arg alignment: alignment type, one of ``'full'`` (default), ``'seed'``,
         ``'ncbi'``, ``'metagenomics'``, ``'rp15'``, ``'rp35'``, ``'rp55'``,
         or ``'rp75'`` where rp stands for representative proteomes

    :arg compressed: gzip the downloaded MSA file, default is **False**

    *Alignment Options*

    :arg format: a Pfam supported MSA file format, one of ``'selex'``,
        (default), ``'stockholm'`` or ``'fasta'``

    :arg order: ordering of sequences, ``'tree'`` (default) or
        ``'alphabetical'``

    :arg inserts: letter case for inserts, ``'upper'`` (default) or ``'lower'``

    :arg gaps: gap character, one of ``'dashes'`` (default), ``'dots'``,
        ``'mixed'`` or **None** for unaligned

    *Other Options*

    :arg timeout: timeout for blocking connection attempt in seconds, default
        is 60

    :arg outname: out filename, default is input ``'acc_alignment.format'``

    :arg folder: output folder, default is ``'.'``"""

    url = 'http://pfam.sanger.ac.uk/family/acc?id=' + acc
    handle = openURL(url)
    orig_acc = acc
    acc = handle.readline().strip()
    if PY3K:
        acc = acc.decode()
    url_flag = False

    if not re.search('(?<=PF)[0-9]{5}$', acc):
        raise ValueError('{0} is not a valid Pfam ID or Accession Code'
                         .format(repr(orig_acc)))

    if alignment not in DOWNLOAD_FORMATS:
        raise ValueError('alignment must be one of full, seed, ncbi or'
                         ' metagenomics')
    if alignment == 'ncbi' or alignment == 'metagenomics':
        url = ('http://pfam.sanger.ac.uk/family/' + acc + '/alignment/' +
               alignment + '/gzipped')
        url_flag = True
        extension = '.sth'
    else:
        if not kwargs:
            url = ('http://pfam.sanger.ac.uk/family/' + acc + '/alignment/' +
                   alignment + '/gzipped')
            url_flag = True
            extension = '.sth'
        else:
            align_format = kwargs.get('format', 'selex').lower()

            if align_format not in FORMAT_OPTIONS['format']:
                raise ValueError('alignment format must be of type selex'
                                 ' stockholm or fasta. MSF not supported')

            if align_format == SELEX:
                align_format, extension = 'pfam', '.slx'
            elif align_format == FASTA:
                extension = '.fasta'
            else:
                extension = '.sth'

            gaps = str(kwargs.get('gaps', 'dashes')).lower()
            if gaps not in FORMAT_OPTIONS['gaps']:
                raise ValueError('gaps must be of type mixed, dots, dashes, '
                                 'or None')

            inserts = kwargs.get('inserts', 'upper').lower()
            if(inserts not in FORMAT_OPTIONS['inserts']):
                raise ValueError('inserts must be of type lower or upper')

            order = kwargs.get('order', 'tree').lower()
            if order not in FORMAT_OPTIONS['order']:
                raise ValueError('order must be of type tree or alphabetical')

            url = ('http://pfam.sanger.ac.uk/family/' + acc + '/alignment/'
                   + alignment + '/format?format=' + align_format +
                   '&alnType=' + alignment + '&order=' + order[0] +
                   '&case=' + inserts[0] + '&gaps=' + gaps + '&download=1')

    response = openURL(url, timeout=int(kwargs.get('timeout', 60)))
    outname = kwargs.get('outname', None)
    if not outname:
        outname = orig_acc
    folder = str(kwargs.get('folder', '.'))
    filepath = join(makePath(folder), outname + '_' + alignment + extension)
    if compressed:
        filepath = filepath + '.gz'
        if url_flag:
            f_out = open(filepath, 'wb')
        else:
            f_out = openFile(filepath, 'wb')
        f_out.write(response.read())
        f_out.close()
    else:
        if url_flag:
            gunzip(response.read(), filepath)
        else:
            with open(filepath, 'wb') as f_out:
                f_out.write(response.read())

    filepath = relpath(filepath)
    LOGGER.info('Pfam MSA for {0} is written as {1}.'
                .format(orig_acc, filepath))

    return filepath
Ejemplo n.º 52
0
def writeNMD(filename, modes, atoms):
    """Returns *filename* that contains *modes* and *atoms* data in NMD format
    described in :ref:`nmd-format`.  :file:`.nmd` extension is appended to
    filename, if it does not have an extension.

    .. note::
       #. This function skips modes with zero eigenvalues.
       #. If a :class:`.Vector` instance is given, it will be normalized
          before it is written. It's length before normalization will be
          written as the scaling factor of the vector."""

    if not isinstance(modes, (NMA, ModeSet, Mode, Vector)):
        raise TypeError('modes must be NMA, ModeSet, Mode, or Vector, '
                        'not {0}'.format(type(modes)))
    if modes.numAtoms() != atoms.numAtoms():
        raise Exception('number of atoms do not match')
    out = openFile(addext(filename, '.nmd'), 'w')

    #out.write('#!{0} -e\n'.format(VMDPATH))
    out.write('nmwiz_load {0}\n'.format(abspath(filename)))
    name = modes.getTitle()
    name = name.replace(' ', '_').replace('.', '_')
    if not name.replace('_', '').isalnum() or len(name) > 30:
        name = str(atoms)
        name = name.replace(' ', '_').replace('.', '_')
    if not name.replace('_', '').isalnum() or len(name) > 30:
        name = splitext(split(filename)[1])[0]
    out.write('name {0}\n'.format(name))
    try:
        coords = atoms.getCoords()
    except:
        raise ValueError('coordinates could not be retrieved from atoms')
    if coords is None:
        raise ValueError('atom coordinates are not set')

    try:
        data = atoms.getNames()
        if data is not None:
            out.write('atomnames {0}\n'.format(' '.join(data)))
    except:
        pass
    try:
        data = atoms.getResnames()
        if data is not None:
            out.write('resnames {0}\n'.format(' '.join(data)))
    except:
        pass
    try:
        data = atoms.getResnums()
        if data is not None:
            out.write('resids ')
            data.tofile(out, ' ')
            out.write('\n')
    except:
        pass
    try:
        data = atoms.getChids()
        if data is not None:
            out.write('chainids {0}\n'.format(' '.join(data)))
    except:
        pass
    try:
        data = atoms.getSegnames()
        if data is not None:
            out.write('segnames {0}\n'.format(' '.join(data)))
    except:
        pass

    try:
        data = atoms.getBetas()
        if data is not None:
            out.write('bfactors ')
            data.tofile(out, ' ', '%.2f')
            out.write('\n')
    except:
        pass

    format = '{0:.3f}'.format
    out.write('coordinates ')
    coords.tofile(out, ' ', '%.3f')
    out.write('\n')
    count = 0
    if isinstance(modes, Vector):
        out.write('mode 1 {0:.2f} '.format(abs(modes)))
        modes.getNormed()._getArray().tofile(out, ' ', '%.3f')
        out.write('\n')
        count += 1
    else:
        if isinstance(modes, Mode):
            modes = [modes]
        for mode in modes:
            if mode.getEigval() < ZERO:
                continue
            out.write('mode {0} {1:.2f} '.format(mode.getIndex() + 1,
                                                 mode.getVariance()**0.5))
            arr = mode._getArray().tofile(out, ' ', '%.3f')
            out.write('\n')
            count += 1
    if count == 0:
        LOGGER.warning('No normal mode data was written. '
                       'Given modes might have 0 eigenvalues.')
    out.close()
    return filename
Ejemplo n.º 53
0
def parseEMD(emd, **kwargs):
    """Parses an EM density map in EMD/MRC2015 format and 
    optionally returns an :class:`.AtomGroup` containing  
    beads built in the density using the TRN algorithm [_TM94]. 

    This function extends :func:`.parseEMDStream`.

    See :ref:`cryoem_analysis` for a usage example. 

    :arg emd: an EMD identifier or a file name. A 4-digit 
              EMDataBank identifier can be provided to download 
              it via FTP.
    :type emd: str

    :arg min_cutoff: minimum density cutoff to read EMD map. The regions with 
                 lower density than this cutoff are discarded.
                 This corresponds to the previous cutoff and take values from it.
    :type min_cutoff: float

    :arg max_cutoff: maximum density cutoff to read EMD map. The regions with 
                 higher density than this cutoff are discarded.
    :type max_cutoff: float

    :arg n_nodes: A bead based network will be constructed into the provided density map. 
                  This parameter will set the number of beads to fit to density map. 
                  Default is 0. Please change it to some number to run the TRN algorithm.
    :type n_nodes: int  

    :arg num_iter: After reading density map, coordinates are predicted with the 
                   topology representing network method. This parameter is the total 
                   number of iterations of this algorithm.
    :type num_iter: int

    :arg map: Return the density map itself. Default is **False** in line with previous behaviour.
        This value is reset to **True** if n_nodes is 0 or less.
    :type map: bool
    """

    title = kwargs.get('title', None)
    if not os.path.isfile(emd):
        if emd.startswith('EMD-') and len(emd[4:]) in [4, 5]:
            emd = emd[4:]

        if len(emd) in [4, 5] and emd.isdigit():
            if title is None:
                title = emd
                kwargs['title'] = title

            if os.path.isfile(emd + '.map'):
                filename = emd + '.map'
            elif os.path.isfile(emd + '.map.gz'):
                filename = emd + '.map.gz'
            else:
                filename = fetchPDB(emd,
                                    report=True,
                                    format='emd',
                                    compressed=False)
                if filename is None:
                    raise IOError(
                        'EMD map file for {0} could not be downloaded.'.format(
                            emd))
            emd = filename
        else:
            raise IOError(
                'EMD file {0} is not available in the directory {1}'.format(
                    emd, os.getcwd()))
    if title is None:
        kwargs['title'], ext = os.path.splitext(os.path.split(emd)[1])

    emdStream = openFile(emd, 'rb')
    result = parseEMDStream(emdStream, **kwargs)
    emdStream.close()

    return result
Ejemplo n.º 54
0
Archivo: pfam.py Proyecto: njekin/ProDy
def searchPfam(query, search_b=False, skip_a=False, **kwargs):
    """Return Pfam search results in a dictionary.  Matching Pfam accession
    as keys will map to evalue, alignment start and end residue positions.

    :arg query: UniProt ID, PDB identifier, protein sequence, or a sequence
        file, sequence queries must not contain without gaps and must be at
        least 16 characters long
    :type query: str

    :arg search_b: search Pfam-B families when **True**
    :type search_b: bool

    :arg skip_a: do not search Pfam-A families when **True**
    :type skip_a: bool

    :arg ga: use gathering threshold when **True**
    :type ga: bool

    :arg evalue: user specified e-value cutoff, must be smaller than 10.0
    :type evalue: float

    :arg timeout: timeout for blocking connection attempt in seconds, default
        is 60
    :type timeout: int

    *query* can also be a PDB identifier, e.g. ``'1mkp'`` or ``'1mkpA'`` with
    chain identifier.  UniProt ID of the specified chain, or the first
    protein chain will be used for searching the Pfam database."""

    prefix = '{http://pfam.sanger.ac.uk/}'
    query = str(query)
    if isfile(query):
        from prody.sequence import MSAFile
        try:
            seq = next(MSAFile(query))
        except:
            with openFile(query) as inp:
                seq = ''.join(inp.read().split())
        else:
            seq = seq[0][1]
        if not seq.isalpha():
            raise ValueError('could not parse a sequence without gaps from ' +
                             query)
    else:
        seq = ''.join(query.split())

    import xml.etree.cElementTree as ET
    LOGGER.timeit('_pfam')
    timeout = int(kwargs.get('timeout', 60))
    if len(seq) >= MINSEQLEN:
        if not seq.isalpha():
            raise ValueError(repr(seq) + ' is not a valid sequence')

        urlextension = ''
        if kwargs:
            ga = int(kwargs.get('ga', 1))
            if not (ga == 1 or ga == 0):
                raise ValueError('ga must be either 0 or 1')

            evalue = kwargs.get('evalue', None)
            if evalue:
                if not float(evalue) <= 10.0:
                    raise ValueError('evalue must be a valid float < 10.0')
                urlextension = urlextension + '&evalue=' + str(evalue)
            else:
                urlextension = urlextension + '&ga=' + str(ga)

        search_b = int(bool(search_b))
        skip_a = int(bool(skip_a))
        if skip_a == 1:
            search_b = 1

        urlextension = urlextension + '&searchBs=' + str(search_b)
        urlextension = urlextension + '&skipAs='******'http://pfam.sanger.ac.uk/search/sequence?seq=' + str(seq) +
               urlextension + '&output=xml')
        LOGGER.debug('Submitted Pfam search for sequence "{0}...".'
                     .format(seq[:MINSEQLEN]))

        xml = openURL(url, timeout=timeout).read()

        try:
            root = ET.XML(xml)
        except Exception as err:
            raise ValueError('failed to parse results XML, check URL: ' + url)

        try:
            url = dictElement(root[0], prefix)['result_url']
        except (IndexError, KeyError):
            raise ValueError('failed to parse results XML, check URL: ' + url)

    else:
        if len(seq) <= 5:
            idcode = None
            from prody import parsePDBHeader
            try:
                polymers = parsePDBHeader(seq[:4], 'polymers')
            except Exception as err:
                LOGGER.warn('failed to parse header for {0} ({1})'
                            .format(seq[:4], str(err)))
            else:
                chid = seq[4:].upper()
                for poly in polymers:
                    if chid and poly.chid != chid:
                        continue
                    for dbref in poly.dbrefs:
                        if dbref.database != 'UniProt':
                            continue
                        idcode = dbref.idcode
                        LOGGER.info('UniProt ID code {0} for {1} chain '
                                    '{2} will be used.'
                                    .format(idcode, seq[:4], poly.chid))
                        break
                    if idcode is not None:
                        break
            if idcode is None:
                LOGGER.warn('A UniProt ID code for PDB {0} could not be '
                            'parsed.'.format(repr(seq)))
                url = 'http://pfam.sanger.ac.uk/protein/' + seq + '?output=xml'
            else:
                url = ('http://pfam.sanger.ac.uk/protein/' +
                       idcode + '?output=xml')

        else:
            url = 'http://pfam.sanger.ac.uk/protein/' + seq + '?output=xml'

    LOGGER.debug('Retrieving Pfam search results: ' + url)
    xml = None
    while LOGGER.timing('_pfam') < timeout:
        try:
            xml = openURL(url, timeout=timeout).read()
        except Exception:
            pass
        #else:
        #    if xml:
        #        break

    if not xml:
        raise IOError('Pfam search timed out or failed to parse results '
                      'XML, check URL: ' + url)
    else:
        LOGGER.report('Pfam search completed in %.2fs.', '_pfam')

    if xml.find(b'There was a system error on your last request.') > 0:
        LOGGER.warn('No Pfam matches found for: ' + seq)
        return None

    try:
        root = ET.XML(xml)
    except Exception as err:
        raise ValueError('failed to parse results XML, check URL: ' + url)

    if len(seq) >= MINSEQLEN:
        try:
            xml_matches = root[0][0][0][0]
        except IndexError:
            raise ValueError('failed to parse results XML, check URL: ' + url)
    else:
        results = dictElement(root[0], prefix)
        try:
            xml_matches = results['matches']
        except KeyError:
            raise ValueError('failed to parse results XML, check URL: ' + url)

    matches = dict()
    for child in xml_matches:

        try:
            accession = child.attrib['accession'][:7]
        except KeyError:
            raise ValueError('failed to parse results XML, check URL: ' + url)

        if not re.search('^P(F|B)[0-9]{5}$', accession):
            raise ValueError('{0} does not match pfam accession'
                             ' format'.format(accession))

        match = matches.setdefault(accession, dict(child.items()))
        locations = match.setdefault('locations', [])
        for loc in child:
            locations.append(dict(loc.items()))

    if len(seq) < MINSEQLEN:
        query = 'Query ' + repr(query)
    else:
        query = 'Query sequence'

    if matches:
        LOGGER.info(query + ' matched {0} Pfam families.'.format(len(matches)))
    else:
        LOGGER.info(query + ' did not match any Pfam families.')
    return matches
Ejemplo n.º 55
0
def parsePSF(filename, title=None, ag=None):
    """Returns an :class:`.AtomGroup` instance storing data parsed from X-PLOR
    format PSF file *filename*.  Atom and bond information is parsed from the
    file.  If *title* is not given, *filename* will be set as the title of the
    :class:`.AtomGroup` instance.  An :class:`.AtomGroup` instance may be
    provided as *ag* argument.  When provided, *ag* must have the same number
    of atoms in the same order as the file.  Data from PSF file will be added
    to the *ag*.  This may overwrite present data if it overlaps with PSF file
    content.  
    
    This function now includes the angles, dihedrals, and impropers sections
    as well as donors, acceptors and crossterms!"""

    if ag is not None:
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')

    psf = openFile(filename, 'rb')
    line = psf.readline()
    while line:
        line = line.strip()
        if line.endswith(b'!NATOM'):
            n_atoms = int(line.split(b'!')[0])
            break
        line = psf.readline()
    if title is None:
        title = os.path.splitext(os.path.split(filename)[1])[0]
    else:
        title = str(title)
    if ag is None:
        ag = AtomGroup(title)
    else:
        if n_atoms != ag.numAtoms():
            raise ValueError('ag and PSF file must have same number of atoms')

    serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype)
    segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype)
    resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype)
    resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype)
    atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype)
    atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype)
    charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype)
    masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype)

    n = 0
    n_bonds = 0
    for line in psf:
        if line.strip() == b'':
            continue
        if b'!NBOND:' in line.upper():
            items = line.split()
            n_bonds = int(items[0])
            break
        if n + 1 > n_atoms:
            continue

        if len(line) <= 71:
            serials[n] = line[:8]
            segnames[n] = line[9:13].strip()
            resnums[n] = line[14:19]
            resnames[n] = line[19:23].strip()
            atomnames[n] = line[24:28].strip()
            atomtypes[n] = line[29:35].strip()
            charges[n] = line[35:44]
            masses[n] = line[50:60]
        else:
            items = line.split()
            serials[n] = items[0]
            segnames[n] = items[1]
            resnums[n] = items[2]
            resnames[n] = items[3]
            atomnames[n] = items[4]
            atomtypes[n] = items[5]
            charges[n] = items[6]
            masses[n] = items[7]
        n += 1

    if n < n_atoms:
        raise IOError(
            'number of lines in PSF atoms block is less than the number of '
            'atoms')

    n_angles = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NTHETA' in line:
            items = line.split()
            n_angles = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    b_array = fromstring(lines, count=n_bonds * 2, dtype=int, sep=' ')
    if len(b_array) != n_bonds * 2:
        raise IOError('number of bonds expected and parsed do not match')

    n_dihedrals = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NPHI' in line:
            items = line.split()
            n_dihedrals = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    a_array = fromstring(lines, count=n_angles * 3, dtype=int, sep=' ')
    if len(a_array) != n_angles * 3:
        raise IOError('number of angles expected and parsed do not match')

    n_impropers = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NIMPHI' in line:
            items = line.split()
            n_impropers = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    d_array = fromstring(lines, count=n_dihedrals * 4, dtype=int, sep=' ')
    if len(d_array) != n_dihedrals * 4:
        raise IOError('number of dihedrals expected and parsed do not match')

    n_donors = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NDON' in line:
            items = line.split()
            n_donors = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    i_array = fromstring(lines, count=n_impropers * 4, dtype=int, sep=' ')
    if len(i_array) != n_impropers * 4:
        raise IOError('number of impropers expected and parsed do not match')

    n_acceptors = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NACC' in line:
            items = line.split()
            n_acceptors = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    do_array = fromstring(lines, count=n_donors * 2, dtype=int, sep=' ')
    if len(do_array) != n_donors * 2:
        raise IOError('number of donors expected and parsed do not match')

    n_exclusions = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NNB' in line:
            items = line.split()
            n_exclusions = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    ac_array = fromstring(lines, count=n_acceptors * 2, dtype=int, sep=' ')
    if len(ac_array) != n_acceptors * 2:
        raise IOError('number of acceptors expected and parsed do not match')

    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!' in line:
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    nbe_array = fromstring(lines, count=n_exclusions * 2, dtype=int, sep=' ')
    if len(nbe_array) != n_exclusions * 2:
        raise IOError(
            'number of nonbonded exclusions expected and parsed do not match')

    n_crossterms = 0
    for i, line in enumerate(psf):
        if b'!NCRTERM' in line:
            items = line.split()
            n_crossterms = int(items[0])
            break

    lines = []
    for i, line in enumerate(psf):
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    c_array = fromstring(lines, count=n_crossterms * 4, dtype=int, sep=' ')
    if len(c_array) != n_crossterms * 4:
        raise IOError('number of crossterms expected and parsed do not match')

    psf.close()
    ag.setSerials(serials)
    ag.setSegnames(segnames)
    ag.setResnums(resnums)
    ag.setResnames(resnames)
    ag.setNames(atomnames)
    ag.setTypes(atomtypes)
    ag.setCharges(charges)
    ag.setMasses(masses)

    if n_bonds > 0:
        b_array = add(b_array, -1, b_array)
        ag.setBonds(b_array.reshape((n_bonds, 2)))

    if n_angles > 0:
        a_array = add(a_array, -1, a_array)
        ag.setAngles(a_array.reshape((n_angles, 3)))

    if n_dihedrals > 0:
        d_array = add(d_array, -1, d_array)
        ag.setDihedrals(d_array.reshape((n_dihedrals, 4)))

    if n_impropers > 0:
        i_array = add(i_array, -1, i_array)
        ag.setImpropers(i_array.reshape((n_impropers, 4)))

    if n_donors > 0:
        do_array = add(do_array, -1, do_array)
        ag.setDonors(do_array.reshape((n_donors, 2)))

    if n_acceptors > 0:
        ac_array = add(ac_array, -1, ac_array)
        ag.setAcceptors(ac_array.reshape((n_acceptors, 2)))

    if n_exclusions > 0:
        nbe_array = add(nbe_array, -1, nbe_array)
        ag.setNBExclusions(nbe_array.reshape((n_exclusions, 2)))

    if n_crossterms > 0:
        c_array = add(c_array, -1, c_array)
        ag.setCrossterms(c_array.reshape((n_crossterms, 4)))

    return ag
Ejemplo n.º 56
0
def fetchPDBLigand(cci, filename=None):
    """Fetch PDB ligand data from PDB_ for chemical component *cci*.
    *cci* may be 3-letter chemical component identifier or a valid XML
    filename.  If *filename* is given, XML file will be saved with that name.

    If you query ligand data frequently, you may configure ProDy to save XML
    files in your computer.  Set ``ligand_xml_save`` option **True**, i.e.
    ``confProDy(ligand_xml_save=True)``.  Compressed XML files will be save
    to ProDy package folder, e.g. :file:`/home/user/.prody/pdbligands`.  Each
    file is around 5Kb when compressed.

    This function is compatible with PDBx/PDBML v 4.0.

    Ligand data is returned in a dictionary.  Ligand coordinate atom data with
    *model* and *ideal* coordinate sets are also stored in this dictionary.
    Note that this dictionary will contain data that is present in the XML
    file and all Ligand Expo XML files do not contain every possible data
    field.  So, it may be better if you use :meth:`dict.get` instead of
    indexing the dictionary, e.g. to retrieve formula weight (or relative
    molar mass) of the chemical component use ``data.get('formula_weight')``
    instead of ``data['formula_weight']`` to avoid exceptions when this data
    field is not found in the XML file.  URL and/or path of the XML file are
    returned in the dictionary with keys ``url`` and ``path``, respectively.

    Following example downloads data for ligand STI (a.k.a. Gleevec and
    Imatinib) and calculates RMSD between model (X-ray structure 1IEP) and
    ideal (energy minimized) coordinate sets:

    .. ipython:: python

       from prody import *
       ligand_data = fetchPDBLigand('STI')
       ligand_data['model_coordinates_db_code']
       ligand_model = ligand_data['model']
       ligand_ideal = ligand_data['ideal']
       transformation = superpose(ligand_ideal.noh, ligand_model.noh)
       calcRMSD(ligand_ideal.noh, ligand_model.noh)"""

    if not isinstance(cci, str):
        raise TypeError('cci must be a string')
    if isfile(cci):
        inp = openFile(cci)
        xml = inp.read()
        inp.close()
        url = None
        path = cci
        cci = splitext(splitext(split(cci)[1])[0])[0].upper()
    elif len(cci) > 4 or not cci.isalnum():
        raise ValueError('cci must be 3-letters long and alphanumeric or '
                         'a valid filename')
    else:
        xml = None
        cci = cci.upper()
        if SETTINGS.get('ligand_xml_save'):
            folder = join(getPackagePath(), 'pdbligands')
            if not isdir(folder):
                makePath(folder)
            xmlgz = path = join(folder, cci + '.xml.gz')
            if isfile(xmlgz):
                with openFile(xmlgz) as inp:
                    xml = inp.read()
        else:
            path = None
        #url = ('http://ligand-expo.rcsb.org/reports/{0[0]}/{0}/{0}'
        #       '.xml'.format(cci.upper()))
        url = 'http://www.pdb.org/pdb/files/ligand/{0}.xml'.format(cci.upper())
        if not xml:
            #'http://www.pdb.org/pdb/files/ligand/{0}.xml'
            try:
                inp = openURL(url)
            except IOError:
                raise IOError('XML file for ligand {0} is not found online'
                              .format(cci))
            else:
                xml = inp.read()
                inp.close()
            if filename:
                out = openFile(filename, mode='w', folder=folder)
                out.write(xml)
                out.close()
            if SETTINGS.get('ligand_xml_save'):
                with openFile(xmlgz, 'w') as out:
                    out.write(xml)

    import xml.etree.cElementTree as ET

    root = ET.XML(xml)
    if (root.get('{http://www.w3.org/2001/XMLSchema-instance}'
                 'schemaLocation') !=
            'http://pdbml.pdb.org/schema/pdbx-v40.xsd pdbx-v40.xsd'):
        LOGGER.warn('XML is not in PDBx/PDBML v 4.0 format, resulting '
                    'dictionary may not contain all data fields')
    ns = root.tag[:root.tag.rfind('}')+1]
    len_ns = len(ns)
    dict_ = {'url': url, 'path': path}

    for child in list(root.find(ns + 'chem_compCategory')[0]):
        tag = child.tag[len_ns:]
        if tag.startswith('pdbx_'):
            tag = tag[5:]
        dict_[tag] = child.text
    dict_['formula_weight'] = float(dict_.get('formula_weight'))

    identifiers_and_descriptors = []
    results = root.find(ns + 'pdbx_chem_comp_identifierCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    results = root.find(ns + 'pdbx_chem_comp_descriptorCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    for child in identifiers_and_descriptors:
        program = child.get('program').replace(' ', '_')
        type_ = child.get('type').replace(' ', '_')
        dict_[program + '_' + type_] = child[0].text
        dict_[program + '_version'] = child.get('program_version')

    dict_['audits'] = [(audit.get('action_type'), audit.get('date'))
                       for audit in
                       list(root.find(ns + 'pdbx_chem_comp_auditCategory'))]

    atoms = list(root.find(ns + 'chem_comp_atomCategory'))
    n_atoms = len(atoms)
    ideal_coords = np.zeros((n_atoms, 3))
    model_coords = np.zeros((n_atoms, 3))

    atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    elements = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['element'].dtype)
    resnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['resname'].dtype)
    charges = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    resnums = np.ones(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    alternate_atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    leaving_atom_flags = np.zeros(n_atoms, np.bool)
    aromatic_flags = np.zeros(n_atoms, np.bool)
    stereo_configs = np.zeros(n_atoms, np.bool)
    ordinals = np.zeros(n_atoms, int)

    name2index = {}

    for i, atom in enumerate(atoms):
        data = dict([(child.tag[len_ns:], child.text) for child in list(atom)])

        name = data.get('pdbx_component_atom_id', 'X')
        name2index[name] = i
        atomnames[i] = name
        elements[i] = data.get('type_symbol', 'X')
        resnames[i] = data.get('pdbx_component_comp_id', 'UNK')
        charges[i] = float(data.get('charge', 0))

        alternate_atomnames[i] = data.get('alt_atom_id', 'X')
        leaving_atom_flags[i] = data.get('pdbx_leaving_atom_flag') == 'Y'
        aromatic_flags[i] = data.get('pdbx_atomatic_flag') == 'Y'
        stereo_configs[i] = data.get('pdbx_stereo_config') == 'Y'
        ordinals[i] = int(data.get('pdbx_ordinal', 0))

        model_coords[i, 0] = float(data.get('model_Cartn_x', 0))
        model_coords[i, 1] = float(data.get('model_Cartn_y', 0))
        model_coords[i, 2] = float(data.get('model_Cartn_z', 0))
        ideal_coords[i, 0] = float(data.get('pdbx_model_Cartn_x_ideal', 0))
        ideal_coords[i, 1] = float(data.get('pdbx_model_Cartn_y_ideal', 0))
        ideal_coords[i, 2] = float(data.get('pdbx_model_Cartn_z_ideal', 0))

    pdbid = dict_.get('model_coordinates_db_code')
    if pdbid:
        model = AtomGroup(cci + ' model ({0})'.format(pdbid))
    else:
        model = AtomGroup(cci + ' model')
    model.setCoords(model_coords)
    model.setNames(atomnames)
    model.setResnames(resnames)
    model.setResnums(resnums)
    model.setElements(elements)
    model.setCharges(charges)
    model.setFlags('leaving_atom_flags', leaving_atom_flags)
    model.setFlags('aromatic_flags', aromatic_flags)
    model.setFlags('stereo_configs', stereo_configs)
    model.setData('ordinals', ordinals)
    model.setData('alternate_atomnames', alternate_atomnames)
    dict_['model'] = model
    ideal = model.copy()
    ideal.setTitle(cci + ' ideal')
    ideal.setCoords(ideal_coords)
    dict_['ideal'] = ideal

    bonds = []
    warned = set()
    for bond in list(root.find(ns + 'chem_comp_bondCategory') or bonds):
        name_1 = bond.get('atom_id_1')
        name_2 = bond.get('atom_id_2')
        try:
            bonds.append((name2index[name_1], name2index[name_2]))
        except KeyError:
            if name_1 not in warned and name_1 not in name2index:
                warned.add(name_1)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_1), cci))
            if name_2 not in warned and name_2 not in name2index:
                warned.add(name_2)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_2), cci))
    if bonds:
        bonds = np.array(bonds, int)
        model.setBonds(bonds)
        ideal.setBonds(bonds)
    return dict_
Ejemplo n.º 57
0
    def __init__(self, msa, mode='r', format=None, aligned=True, **kwargs):
        """*msa* may be a filename or a stream.  Multiple sequence alignments
        can be read from or written in FASTA (:file:`.fasta`), Stockholm
        (:file:`.sth`), or SELEX (:file:`.slx`) *format*.  For spesified
        extensions, *format* argument is not needed.  If *aligned* is
        **True**, unaligned sequences in the file or stream will cause an
        :exc:`IOError` exception.  *filter*, a function that returns a
        boolean, can be used for filtering sequences, see :meth:`setFilter`
        for details.  *slice* can be used to slice sequences, and is applied
        after filtering, see :meth:`setSlice` for details."""

        if mode[0] not in 'rwa':
            raise ValueError("mode string must be one of 'r', 'w', or 'a', "
                             "not {0}".format(repr(mode)))

        if 'b' in mode:
            mode = mode.replace('b', '')
        if 't' not in mode:
            mode += 't'

        self._format = None
        if format is not None:
            try:
                self._format = format = MSAFORMATS[format.lower()]
            except AttributeError:
                raise TypeError('format argument must be a string')
            except KeyError:
                raise ValueError('format argument is not recognized')

        self._filename = filename = None
        if mode.startswith('r'):
            try:
                torf = isfile(msa)
            except:
                pass
            else:
                if torf:
                    self._filename = filename = msa
        else:
            try:
                msa.lower, msa.strip
            except AttributeError:
                pass
            else:
                self._filename = filename = msa

        if filename is not None:
            self._filename = filename
            title, ext = splitext(split(filename)[1])
            if ext.lower() == '.gz':
                title, ext = splitext(split(title)[1])
            if format is None:
                try:
                    self._format = format = MSAEXTMAP[ext.lower()]
                except KeyError:
                    raise TypeError('format is not specified and could not be '
                                    'determined from file extension')
            self._title = title
            self._stream = openFile(msa, mode)

        else:
            if self._format is None:
                raise ValueError('format must be specified when msa is a '
                                 'stream')
            self._stream = msa
            self._title = 'stream'
            try:
                closed = self._stream.closed
            except AttributeError:
                closed = self._stream.myfileobj.closed
            if closed:
                raise ValueError('msa stream must not be closed')


        self._lenseq = None
        self._closed = False
        self._readline = None
        self._aligned = bool(aligned)

        if mode.startswith('r'):
            self._readline = self._stream.readline
            try:
                self._readlines = self._stream.readlines
            except AttributeError:
                pass

            self.setFilter(kwargs.get('filter', None),
                           kwargs.get('filter_full', False))
            self.setSlice(kwargs.get('slice', None))
            self._iter = self._itermap[format](self)
        else:
            try:
                self._write = write = self._stream.write
            except AttributeError:
                raise TypeError('msa must be a filename or a stream with '
                                'write method')
            if mode.startswith('w') and format == STOCKHOLM:
                write('# STOCKHOLM 1.0\n')
            if format.startswith('S'):
                self._selex_line = '{0:' + str(LEN_SELEX_LABEL) + 's} {1}\n'

        self._mode = mode
Ejemplo n.º 58
0
def fetchPfamMSA(acc, alignment="full", compressed=False, **kwargs):
    """Return a path to the downloaded Pfam MSA file.

    :arg acc: Pfam ID or Accession Code
    :type acc: str

    :arg alignment: alignment type, one of ``'full'`` (default), ``'seed'``,
         ``'ncbi'``, ``'metagenomics'``, ``'rp15'``, ``'rp35'``, ``'rp55'``,
         or ``'rp75'`` where rp stands for representative proteomes

    :arg compressed: gzip the downloaded MSA file, default is **False**

    *Alignment Options*

    :arg format: a Pfam supported MSA file format, one of ``'selex'``,
        (default), ``'stockholm'`` or ``'fasta'``

    :arg order: ordering of sequences, ``'tree'`` (default) or
        ``'alphabetical'``

    :arg inserts: letter case for inserts, ``'upper'`` (default) or ``'lower'``

    :arg gaps: gap character, one of ``'dashes'`` (default), ``'dots'``,
        ``'mixed'`` or **None** for unaligned

    *Other Options*

    :arg timeout: timeout for blocking connection attempt in seconds, default
        is 60

    :arg outname: out filename, default is input ``'acc_alignment.format'``

    :arg folder: output folder, default is ``'.'``"""

    url = "http://pfam.sanger.ac.uk/family/acc?id=" + acc
    handle = openURL(url)
    orig_acc = acc
    acc = handle.readline().strip()
    if PY3K:
        acc = acc.decode()
    url_flag = False

    if not re.search("(?<=PF)[0-9]{5}$", acc):
        raise ValueError("{0} is not a valid Pfam ID or Accession Code".format(repr(orig_acc)))

    if alignment not in DOWNLOAD_FORMATS:
        raise ValueError("alignment must be one of full, seed, ncbi or" " metagenomics")
    if alignment == "ncbi" or alignment == "metagenomics":
        url = "http://pfam.sanger.ac.uk/family/" + acc + "/alignment/" + alignment + "/gzipped"
        url_flag = True
        extension = ".sth"
    else:
        if not kwargs:
            url = "http://pfam.sanger.ac.uk/family/" + acc + "/alignment/" + alignment + "/gzipped"
            url_flag = True
            extension = ".sth"
        else:
            align_format = kwargs.get("format", "selex").lower()

            if align_format not in FORMAT_OPTIONS["format"]:
                raise ValueError("alignment format must be of type selex" " stockholm or fasta. MSF not supported")

            if align_format == SELEX:
                align_format, extension = "pfam", ".slx"
            elif align_format == FASTA:
                extension = ".fasta"
            else:
                extension = ".sth"

            gaps = str(kwargs.get("gaps", "dashes")).lower()
            if gaps not in FORMAT_OPTIONS["gaps"]:
                raise ValueError("gaps must be of type mixed, dots, dashes, " "or None")

            inserts = kwargs.get("inserts", "upper").lower()
            if inserts not in FORMAT_OPTIONS["inserts"]:
                raise ValueError("inserts must be of type lower or upper")

            order = kwargs.get("order", "tree").lower()
            if order not in FORMAT_OPTIONS["order"]:
                raise ValueError("order must be of type tree or alphabetical")

            url = (
                "http://pfam.sanger.ac.uk/family/"
                + acc
                + "/alignment/"
                + alignment
                + "/format?format="
                + align_format
                + "&alnType="
                + alignment
                + "&order="
                + order[0]
                + "&case="
                + inserts[0]
                + "&gaps="
                + gaps
                + "&download=1"
            )

    response = openURL(url, timeout=int(kwargs.get("timeout", 60)))
    outname = kwargs.get("outname", None)
    if not outname:
        outname = orig_acc
    folder = str(kwargs.get("folder", "."))
    filepath = join(makePath(folder), outname + "_" + alignment + extension)
    if compressed:
        filepath = filepath + ".gz"
        if url_flag:
            f_out = open(filepath, "wb")
        else:
            f_out = openFile(filepath, "wb")
        f_out.write(response.read())
        f_out.close()
    else:
        if url_flag:
            gunzip(response.read(), filepath)
        else:
            with open(filepath, "wb") as f_out:
                f_out.write(response.read())

    filepath = relpath(filepath)
    LOGGER.info("Pfam MSA for {0} is written as {1}.".format(orig_acc, filepath))

    return filepath
Ejemplo n.º 59
0
def searchPfam(query, **kwargs):
    """Return Pfam search results in a dictionary.  Matching Pfam accession
    as keys will map to evalue, alignment start and end residue positions.

    :arg query: UniProt ID, PDB identifier, protein sequence, or a sequence
        file, sequence queries must not contain without gaps and must be at
        least 16 characters long
    :type query: str

    :arg timeout: timeout for blocking connection attempt in seconds, default
        is 60
    :type timeout: int

    *query* can also be a PDB identifier, e.g. ``'1mkp'`` or ``'1mkpA'`` with
    chain identifier.  UniProt ID of the specified chain, or the first
    protein chain will be used for searching the Pfam database."""

    prefix = "{http://pfam.xfam.org/}"
    query = str(query)
    if isfile(query):
        from prody.sequence import MSAFile

        try:
            seq = next(MSAFile(query))
        except:
            with openFile(query) as inp:
                seq = "".join(inp.read().split())
        else:
            seq = seq[0][1]
        if not seq.isalpha():
            raise ValueError("could not parse a sequence without gaps from " + query)
    else:
        seq = "".join(query.split())

    import xml.etree.cElementTree as ET

    LOGGER.timeit("_pfam")
    timeout = int(kwargs.get("timeout", 60))
    if len(seq) >= MINSEQLEN:
        if not seq.isalpha():
            raise ValueError(repr(seq) + " is not a valid sequence")

            fseq = ">Seq\n" + seq
            parameters = {"hmmdb": "pfam", "seq": fseq}
            enc_params = urllib.urlencode(parameters)
            request = urllib2.Request("http://hmmer.janelia.org/search/hmmscan", enc_params)

            url = urllib2.urlopen(request).geturl() + "?output=xml"
        LOGGER.debug('Submitted Pfam search for sequence "{0}...".'.format(seq[:MINSEQLEN]))

        xml = openURL(url, timeout=timeout).read()

        try:
            root = ET.XML(xml)
        except Exception as err:
            raise ValueError("failed to parse results XML, check URL: " + url)
            matches = {}
            for child in root[0]:
                if child.tag == "hits":
                    accession = child.get("acc")
                    pfam_id = accession.split(".")[0]
                    matches[pfam_id] = {}
                    matches[pfam_id]["accession"] = accession
                    matches[pfam_id]["class"] = "Domain"
                    matches[pfam_id]["id"] = child.get("name")
                    matches[pfam_id]["locations"] = {}
                    matches[pfam_id]["locations"]["ali_end"] = child[0].get("alisqto")
                    matches[pfam_id]["locations"]["ali_start"] = child[0].get("alisqfrom")
                    matches[pfam_id]["locations"]["bitscore"] = child[0].get("bitscore")
                    matches[pfam_id]["locations"]["end"] = child[0].get("alisqto")
                    matches[pfam_id]["locations"]["evalue"] = child.get("evalue")
                    matches[pfam_id]["locations"]["evidence"] = "hmmer v3.0"
                    matches[pfam_id]["locations"]["hmm_end"] = child[0].get("alihmmto")
                    matches[pfam_id]["locations"]["hmm_start"] = child[0].get("alihmmfrom")
                    matches[pfam_id]["locations"]["significant"] = child[0].get("significant")
                    matches[pfam_id]["locations"]["start"] = child[0].get("alisqfrom")
                    matches[pfam_id]["type"] = "Pfam-A"
                return matches

    else:
        if len(seq) <= 5:
            idcode = None
            from prody import parsePDBHeader

            try:
                polymers = parsePDBHeader(seq[:4], "polymers")
            except Exception as err:
                LOGGER.warn("failed to parse header for {0} ({1})".format(seq[:4], str(err)))
            else:
                chid = seq[4:].upper()
                for poly in polymers:
                    if chid and poly.chid != chid:
                        continue
                    for dbref in poly.dbrefs:
                        if dbref.database != "UniProt":
                            continue
                        idcode = dbref.idcode
                        LOGGER.info(
                            "UniProt ID code {0} for {1} chain " "{2} will be used.".format(idcode, seq[:4], poly.chid)
                        )
                        break
                    if idcode is not None:
                        break
            if idcode is None:
                LOGGER.warn("A UniProt ID code for PDB {0} could not be " "parsed.".format(repr(seq)))
                url = "http://pfam.xfam.org/protein/" + seq + "?output=xml"
            else:
                url = "http://pfam.xfam.org/protein/" + idcode + "?output=xml"

        else:
            url = "http://pfam.xfam.org/protein/" + seq + "?output=xml"

    LOGGER.debug("Retrieving Pfam search results: " + url)
    xml = None
    while LOGGER.timing("_pfam") < timeout:
        try:
            xml = openURL(url, timeout=timeout).read()
        except Exception:
            pass
        else:
            if xml:
                break

    if not xml:
        raise IOError("Pfam search timed out or failed to parse results " "XML, check URL: " + url)
    else:
        LOGGER.report("Pfam search completed in %.2fs.", "_pfam")

    if xml.find(b"There was a system error on your last request.") > 0:
        LOGGER.warn("No Pfam matches found for: " + seq)
        return None

    try:
        root = ET.XML(xml)
    except Exception as err:
        raise ValueError("failed to parse results XML, check URL: " + url)

    if len(seq) >= MINSEQLEN:
        try:
            xml_matches = root[0][0][0][0]
        except IndexError:
            raise ValueError("failed to parse results XML, check URL: " + url)
    else:
        results = dictElement(root[0], prefix)
        try:
            xml_matches = results["matches"]
        except KeyError:
            raise ValueError("failed to parse results XML, check URL: " + url)

    matches = dict()
    for child in xml_matches:

        try:
            accession = child.attrib["accession"][:7]
        except KeyError:
            raise ValueError("failed to parse results XML, check URL: " + url)

        if not re.search("^P(F|B)[0-9]{5}$", accession):
            raise ValueError("{0} does not match pfam accession" " format".format(accession))

        match = matches.setdefault(accession, dict(child.items()))
        locations = match.setdefault("locations", [])
        for loc in child:
            locations.append(dict(loc.items()))

    if len(seq) < MINSEQLEN:
        query = "Query " + repr(query)
    else:
        query = "Query sequence"

    if matches:
        LOGGER.info(query + " matched {0} Pfam families.".format(len(matches)))
    else:
        LOGGER.info(query + " did not match any Pfam families.")
    return matches
Ejemplo n.º 60
0
def writeNMD(filename, modes, atoms):
    """Writes an NMD file for given *modes* and includes applicable data from 
    *atoms*.  Returns *filename*, if file is successfully written.  NMD file 
    format is described at :ref:`nmd-format`.
    
    .. note:: 
       #. This function skips modes with zero eigenvalues.
       #. If a :class:`~.Vector` instance is given, it will be normalized 
          before it is written. It's length before normalization will be 
          written as the scaling factor of the vector."""
    
    if not isinstance(modes, (NMA, ModeSet, Mode, Vector)):
        raise TypeError('modes must be NMA, ModeSet, Mode, or Vector, '
                        'not {0:s}'.format(type(modes)))
    if modes.numAtoms() != atoms.numAtoms():
        raise Exception('number of atoms do not match')
    out = openFile(filename, 'w')
    
    #out.write('#!{0:s} -e\n'.format(VMDPATH))
    out.write('nmwiz_load {0:s}\n'.format(os.path.abspath(filename)))
    name = modes.getTitle()
    name = name.replace(' ', '_').replace('.', '_')
    if not name.replace('_', '').isalnum() or len(name) > 30:
        name = str(atoms)
        name = name.replace(' ', '_').replace('.', '_')
    if not name.replace('_', '').isalnum() or len(name) > 30:
        name = os.path.splitext(os.path.split(filename)[1])[0]
    out.write('name {0:s}\n'.format(name))
    try:
        coords = atoms.getCoords()
    except:
        raise ValueError('coordinates could not be retrieved from atoms')
    if coords is None:
        raise ValueError('atom coordinates are not set')
    
    try:
        data = atoms.getNames()
        if data is not None:
            out.write('atomnames {0:s}\n'.format(' '.join(data)))
    except:
        pass
    try:
        data = atoms.getResnames()
        if data is not None:
            out.write('resnames {0:s}\n'.format(' '.join(data)))
    except:
        pass
    try:
        data = atoms.getResnums()
        if data is not None:
            out.write('resids {0:s}\n'.format(' '.join(data.astype('|S5'))))
    except:
        pass
    try:
        data = atoms.getChids()
        if data is not None:
            out.write('chainids {0:s}\n'.format(' '.join(data)))
    except:
        pass
    
    try:
        data = atoms.getBetas()
        if data is not None:
            out.write('bfactors {0:s}\n'.format(' '.join(
                            ['{0:.3f}'.format(x) for x in data.flatten()])))
    except:
        pass
    
    if coords.dtype != float:
        coords = coords.astype(float)
    
    out.write('coordinates {0:s}\n'.format(
                    ' '.join(['{0:.3f}'.format(x) for x in coords.flatten()])))
    
    count = 0
    if isinstance(modes, Vector):
        out.write('mode 1 {0:.2f} {1:s}\n'.format(abs(modes), ' '.join(
                ['{0:.3f}'.format(x) for x in modes.getNormed()._getArray()])))
        count += 1
    else:
        if isinstance(modes, Mode):
            modes = [modes]
        for mode in modes:
            if mode.getEigval() < ZERO:
                continue
            out.write('mode {0:d} {1:.2f} {2:s}\n'.format(
                       mode.getIndex()+1, mode.getVariance()**0.5, 
                       ' '.join(
                            ['{0:.3f}'.format(x) for x in mode._getArray()])))
            count += 1
    if count == 0:
        LOGGER.warning('No normal mode data was written. '
                       'Given modes might have 0 eigenvalues.')
    out.close() 
    return filename