Python AtomGroup.AtomGroup Exemples, prody.atomic.AtomGroup.AtomGroup Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : emdfile.py Projet : Python3pkg/ProDy

def parseEMDStream(stream, **kwargs):
    """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file.

    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    cutoff = float(kwargs.get('cutoff', 1.20))
    n_nodes = int(kwargs.get('n_nodes', 1000))
    num_iter = int(kwargs.get('num_iter', 20))

    ag = None
    title_suffix = ''
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    else: 
        ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
        n_csets = 0

    biomol = kwargs.get('biomol', False)
    hd = None
    LOGGER.warn('Building coordinates from electron density map. This may take a while.')
    LOGGER.timeit()
    _parseEMDLines(ag, stream, cutoff=cutoff, n_nodes=n_nodes, num_iter=num_iter, format='EMD')
    LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(ag.numAtoms(),
                         ag.numCoordsets() - n_csets))
    return ag

Exemple #2

0

Afficher le fichier

def parsePQR(filename, **kwargs):
    """Returns an :class:`.AtomGroup` containing data parsed from PDB lines.

    :arg filename: a PQR filename
    :type filename: str"""

    title = kwargs.get('title', kwargs.get('name'))
    chain = kwargs.get('chain')
    subset = kwargs.get('subset')
    if not os.path.isfile(filename):
        raise IOError('No such file: {0}'.format(repr(filename)))
    if title is None:
        fn, ext = os.path.splitext(os.path.split(filename)[1])
        if ext == '.gz':
            fn, ext = os.path.splitext(fn)
        title = fn.lower()
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'.format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = '_' + chain + title_suffix
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    else:
        ag = AtomGroup(title + title_suffix)
        n_csets = 0

    pqr = openFile(filename, 'rt')
    lines = pqr.readlines()
    pqr.close()
    LOGGER.timeit()
    ag = _parsePDBLines(ag,
                        lines,
                        split=0,
                        model=1,
                        chain=chain,
                        subset=subset,
                        altloc_torf=False,
                        format='pqr')
    if ag.numAtoms() > 0:
        LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(ag.numAtoms(),
                                                ag.numCoordsets() - n_csets))
        return ag
    else:
        return None

Exemple #3

0

Afficher le fichier

Fichier : emdfile.py Projet : uibcdf/ProDy

def parseEMDStream(stream, **kwargs):
    """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file.

    :arg stream: Any object with the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    cutoff = kwargs.get('cutoff', None)
    if cutoff is not None:
        cutoff = float(cutoff)

    n_nodes = kwargs.get('n_nodes', 0)
    num_iter = int(kwargs.get('num_iter', 20))
    map = kwargs.get('map', False)
    make_nodes = kwargs.get('make_nodes', False)

    if n_nodes > 0:
        make_nodes = True
        n_nodes = int(n_nodes)

    if map is False and make_nodes is False:
        LOGGER.warn(
            'At least one of map and make_nodes should be True. '
            'Setting map to False was an intentional change from the default '
            'behaviour so make_nodes has been set to True with n_nodes=1000.')
        make_nodes = True
        n_nodes = 1000

    title_suffix = kwargs.get('title_suffix', '')
    atomgroup = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
    atomgroup._n_atoms = n_nodes

    if make_nodes:
        LOGGER.info(
            'Building coordinates from electron density map. This may take a while.'
        )
        LOGGER.timeit()

        if map:
            emd, atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                            num_iter=num_iter, map=map, make_nodes=make_nodes)
        else:
            atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                       num_iter=num_iter, map=map, make_nodes=make_nodes)

        LOGGER.report('{0} pseudoatoms were fitted in %.2fs.'.format(
            atomgroup.numAtoms(), atomgroup.numCoordsets()))
    else:
        emd = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                             num_iter=num_iter, map=map, make_nodes=make_nodes)

    if make_nodes:
        if map:
            return emd, atomgroup
        else:
            return atomgroup
    else:
        return emd

Exemple #4

0

Afficher le fichier

def parseEMDStream(stream, **kwargs):
    """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file.

    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    cutoff = kwargs.get('cutoff', None)
    if cutoff is not None:
        cutoff = float(cutoff)

    n_nodes = int(kwargs.get('n_nodes', 1000))
    num_iter = int(kwargs.get('num_iter', 20))
    return_map = kwargs.get('return_map',False)
    make_nodes = kwargs.get('make_nodes',False)

    if return_map is False and make_nodes is False:
        LOGGER.warn('At least one of return_map and make_nodes should be True. '
                    'Setting make_nodes to False was an intentional change from the default '
                    'so return_map has been set to True.')
        kwargs['return_map'] = True

    title_suffix = kwargs.get('title_suffix','')
    atomgroup = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)

    if make_nodes:
        LOGGER.info('Building coordinates from electron density map. This may take a while.')
        LOGGER.timeit()

        if return_map:
            emd, atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                            num_iter=num_iter, return_map=return_map, \
                                            make_nodes=make_nodes)
        else:
            atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                       num_iter=num_iter, return_map=return_map, \
                                       make_nodes=make_nodes)
        LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(atomgroup.numAtoms(), atomgroup.numCoordsets()))
    else: 
        emd = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                             num_iter=num_iter, return_map=return_map, \
                             make_nodes=make_nodes)

    if make_nodes:
        if return_map:
            return emd, atomgroup
        else:
            return atomgroup
    else:
        return emd

Exemple #5

0

Afficher le fichier

Fichier : get_geom_dev.py Projet : corredD/mesoscope

def readMolStr(app, lines, filename, model=None, chain=None,subset=None, altloc='A', format="PDB", header=False, secondary=None):
    from prody.proteins.header import getHeaderDict
    from prody.proteins.pdbfile import _parsePDBLines
    from prody.atomic import AtomGroup
    from MolKit2.molecule import Molecule
    import os
    title_suffix = ''
    title, ext = os.path.splitext(os.path.split(filename)[1])
    ag = AtomGroup(title + title_suffix)
    n_csets = 0
    hd, split = getHeaderDict(lines)
    _parsePDBLines(ag, lines, split, model, chain, subset, altloc,
                       format=format)
    ##if ag is not None and isinstance(hd, dict):
        ## if secondary:
        ##     if auto_secondary:
        ##         try:
        ##             ag = assignSecstr(hd, ag)
        ##         except ValueError:
        ##             pass
        ##     else:
        ##         ag = assignSecstr(hd, ag)
        ## if biomol:
        ##     ag = buildBiomolecules(hd, ag)

        ##     if isinstance(ag, list):
        ##         LOGGER.info('Biomolecular transformations were applied, {0} '
        ##                     'biomolecule(s) are returned.'.format(len(ag)))
        ##     else:
        ##         LOGGER.info('Biomolecular transformations were applied to the '
        ##                     'coordinate data.')

    mol = Molecule(title, ag, filename=filename)
    mol.buildBondsByDistance()
    mol.defaultRadii()
    app.addMolecule(mol, group=None, filename=title)
    return mol

Exemple #6

0

Afficher le fichier

Fichier : psffile.py Projet : nffaruk/ProDy

def parsePSF(filename, title=None, ag=None):
    """Returns an :class:`.AtomGroup` instance storing data parsed from X-PLOR
    format PSF file *filename*.  Atom and bond information is parsed from the
    file.  If *title* is not given, *filename* will be set as the title of the
    :class:`.AtomGroup` instance.  An :class:`.AtomGroup` instance may be
    provided as *ag* argument.  When provided, *ag* must have the same number
    of atoms in the same order as the file.  Data from PSF file will be added
    to the *ag*.  This may overwrite present data if it overlaps with PSF file
    content.  
    
    This function now includes the angles, dihedrals, and impropers sections
    as well as donors, acceptors and crossterms!"""

    if ag is not None:
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')

    psf = openFile(filename, 'rb')
    line = psf.readline()
    while line:
        line = line.strip()
        if line.endswith(b'!NATOM'):
            n_atoms = int(line.split(b'!')[0])
            break
        line = psf.readline()
    if title is None:
        title = os.path.splitext(os.path.split(filename)[1])[0]
    else:
        title = str(title)
    if ag is None:
        ag = AtomGroup(title)
    else:
        if n_atoms != ag.numAtoms():
            raise ValueError('ag and PSF file must have same number of atoms')

    serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype)
    segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype)
    resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype)
    resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype)
    atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype)
    atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype)
    charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype)
    masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype)

    n = 0
    n_bonds = 0
    for line in psf:
        if line.strip() == b'':
            continue
        if b'!NBOND:' in line.upper():
            items = line.split()
            n_bonds = int(items[0])
            break
        if n + 1 > n_atoms:
            continue

        if len(line) <= 71:
            serials[n] = line[:8]
            segnames[n] = line[9:13].strip()
            resnums[n] = line[14:19]
            resnames[n] = line[19:23].strip()
            atomnames[n] = line[24:28].strip()
            atomtypes[n] = line[29:35].strip()
            charges[n] = line[35:44]
            masses[n] = line[50:60]
        else:
            items = line.split()
            serials[n] = items[0]
            segnames[n] = items[1]
            resnums[n] = items[2]
            resnames[n] = items[3]
            atomnames[n] = items[4]
            atomtypes[n] = items[5]
            charges[n] = items[6]
            masses[n] = items[7]
        n += 1

    if n < n_atoms:
        raise IOError(
            'number of lines in PSF atoms block is less than the number of '
            'atoms')

    n_angles = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NTHETA' in line:
            items = line.split()
            n_angles = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    b_array = fromstring(lines, count=n_bonds * 2, dtype=int, sep=' ')
    if len(b_array) != n_bonds * 2:
        raise IOError('number of bonds expected and parsed do not match')

    n_dihedrals = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NPHI' in line:
            items = line.split()
            n_dihedrals = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    a_array = fromstring(lines, count=n_angles * 3, dtype=int, sep=' ')
    if len(a_array) != n_angles * 3:
        raise IOError('number of angles expected and parsed do not match')

    n_impropers = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NIMPHI' in line:
            items = line.split()
            n_impropers = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    d_array = fromstring(lines, count=n_dihedrals * 4, dtype=int, sep=' ')
    if len(d_array) != n_dihedrals * 4:
        raise IOError('number of dihedrals expected and parsed do not match')

    n_donors = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NDON' in line:
            items = line.split()
            n_donors = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    i_array = fromstring(lines, count=n_impropers * 4, dtype=int, sep=' ')
    if len(i_array) != n_impropers * 4:
        raise IOError('number of impropers expected and parsed do not match')

    n_acceptors = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NACC' in line:
            items = line.split()
            n_acceptors = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    do_array = fromstring(lines, count=n_donors * 2, dtype=int, sep=' ')
    if len(do_array) != n_donors * 2:
        raise IOError('number of donors expected and parsed do not match')

    n_exclusions = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NNB' in line:
            items = line.split()
            n_exclusions = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    ac_array = fromstring(lines, count=n_acceptors * 2, dtype=int, sep=' ')
    if len(ac_array) != n_acceptors * 2:
        raise IOError('number of acceptors expected and parsed do not match')

    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!' in line:
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    nbe_array = fromstring(lines, count=n_exclusions * 2, dtype=int, sep=' ')
    if len(nbe_array) != n_exclusions * 2:
        raise IOError(
            'number of nonbonded exclusions expected and parsed do not match')

    n_crossterms = 0
    for i, line in enumerate(psf):
        if b'!NCRTERM' in line:
            items = line.split()
            n_crossterms = int(items[0])
            break

    lines = []
    for i, line in enumerate(psf):
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    c_array = fromstring(lines, count=n_crossterms * 4, dtype=int, sep=' ')
    if len(c_array) != n_crossterms * 4:
        raise IOError('number of crossterms expected and parsed do not match')

    psf.close()
    ag.setSerials(serials)
    ag.setSegnames(segnames)
    ag.setResnums(resnums)
    ag.setResnames(resnames)
    ag.setNames(atomnames)
    ag.setTypes(atomtypes)
    ag.setCharges(charges)
    ag.setMasses(masses)

    if n_bonds > 0:
        b_array = add(b_array, -1, b_array)
        ag.setBonds(b_array.reshape((n_bonds, 2)))

    if n_angles > 0:
        a_array = add(a_array, -1, a_array)
        ag.setAngles(a_array.reshape((n_angles, 3)))

    if n_dihedrals > 0:
        d_array = add(d_array, -1, d_array)
        ag.setDihedrals(d_array.reshape((n_dihedrals, 4)))

    if n_impropers > 0:
        i_array = add(i_array, -1, i_array)
        ag.setImpropers(i_array.reshape((n_impropers, 4)))

    if n_donors > 0:
        do_array = add(do_array, -1, do_array)
        ag.setDonors(do_array.reshape((n_donors, 2)))

    if n_acceptors > 0:
        ac_array = add(ac_array, -1, ac_array)
        ag.setAcceptors(ac_array.reshape((n_acceptors, 2)))

    if n_exclusions > 0:
        nbe_array = add(nbe_array, -1, nbe_array)
        ag.setNBExclusions(nbe_array.reshape((n_exclusions, 2)))

    if n_crossterms > 0:
        c_array = add(c_array, -1, c_array)
        ag.setCrossterms(c_array.reshape((n_crossterms, 4)))

    return ag

Exemple #7

0

Afficher le fichier

def parseNMD(filename, type=NMA):
    """Returns :class:`.NMA` and :class:`.AtomGroup` instances storing data
    parsed from *filename* in :file:`.nmd` format. Type should be :class:`.NMA`
    or a subclass such as :class:`.PCA`, :class:`.ANM`, or :class:`.GNM`."""

    if isinstance(type, str):
        type = type.upper().strip()
        if 'ANM' in type:
            type = ANM
        elif 'GNM' in type:
            type = GNM
        elif 'PCA' in type or 'EDA' in type:
            type = PCA
        elif type == 'NMA':
            type = NMA
        else:
            type = None
    if not issubclass(type, NMA):
        raise TypeError('type must be NMA, ANM, GNM, or PCA')

    atomic = {}
    atomic.update([(label, None) for label in NMD_LABEL_MAP])
    atomic['coordinates'] = None
    atomic['name'] = None
    modes = []

    with open(filename) as nmd:
        for i, line in enumerate(nmd):
            try:
                label, data = line.split(None, 1)
            except ValueError:
                pass

            if label == 'mode':
                modes.append((i + 1, data))
            elif label in atomic:
                if atomic[label] is None:
                    atomic[label] = (i + 1, data)
                else:
                    LOGGER.warn('Data label {0} is found more than once in '
                                '{1}.'.format(repr(label), repr(filename)))

    name = atomic.pop('name', '')[1].strip() or splitext(split(filename)[1])[0]
    ag = AtomGroup(name)
    dof = None
    n_atoms = None

    line, coords = atomic.pop('coordinates', None)
    if coords is not None:
        coords = np.fromstring(coords, dtype=float, sep=' ')
        dof = coords.shape[0]
        if dof % 3 != 0:
            LOGGER.warn('Coordinate data in {0} at line {1} is corrupt '
                        'and will be omitted.'.format(repr(filename), line))
        else:
            n_atoms = dof // 3
            coords = coords.reshape((n_atoms, 3))
            ag.setCoords(coords)

    from prody.atomic import ATOMIC_FIELDS

    for label, data in atomic.items():  # PY3K: OK
        if data is None:
            continue
        line, data = data
        data = data.split()
        if n_atoms is None:
            n_atoms = len(data)
            dof = n_atoms * 3
        elif len(data) != n_atoms:
            LOGGER.warn('Data with label {0} in {1} at line {2} is '
                        'corrupt, expected {2} values, parsed {3}.'.format(
                            repr(label), repr(filename), line, n_atoms,
                            len(data)))
            continue
        label = NMD_LABEL_MAP[label]
        data = np.array(data, dtype=ATOMIC_FIELDS[label].dtype)
        ag.setData(label, data)

    if not modes:
        return None, ag

    length = len(modes[0][1].split())
    is3d = length > n_atoms + 2
    if dof is None:
        dof = length - (length % 3)
    elif not is3d:  # GNM
        dof = n_atoms

    array = np.zeros((dof, len(modes)))
    less = 0
    eigvals = []
    count = 0
    for i, (line, mode) in enumerate(modes):
        mode = np.fromstring(mode, dtype=float, sep=' ')
        diff = len(mode) - dof
        if diff < 0 or diff > 2:
            LOGGER.warn('Mode data in {0} at line {1} is corrupt.'.format(
                repr(filename), line))
            continue
        array[:, i - less] = mode[diff:]
        count += 1
        eigvals.append(mode[:diff])

    if count == 0:
        return None, ag

    try:
        eigvals = np.array(eigvals, dtype=float)
    except TypeError:
        LOGGER.warn('Failed to parse eigenvalues from {0}.'.format(
            repr(filename)))

    if eigvals.shape[1] > 2:
        LOGGER.warn('Failed to parse eigenvalues from {0}.'.format(
            repr(filename)))
        eigvals = None
    elif eigvals.shape[1] == 1:
        if np.all(eigvals % 1 == 0):
            LOGGER.warn('Failed to parse eigenvalues from {0}.'.format(
                repr(filename)))
            eigvals = None
        else:
            eigvals = eigvals.flatten()**2
    else:
        eigvals = eigvals[:, 1]**2

    nma = type(name)
    if type != PCA:
        eigvals = 1. / eigvals

    if count != array.shape[1]:
        array = array[:, :count].copy()

    nma.setEigens(array, eigvals)
    return nma, ag

Exemple #8

0

Afficher le fichier

Fichier : emdfile.py Projet : minghao2016/ProDy

def parseEMDStream(stream, **kwargs):
    """Parse lines of data stream from an EMD/MRC2014 file and 
    optionally return an :class:`.AtomGroup` containing TRN 
    nodes based on it.

    :arg stream: Any object with the method ``readlines``
                (e.g. :class:`file`, buffer, stdin)
    """
    cutoff = kwargs.get('cutoff', None)
    min_cutoff = kwargs.get('min_cutoff', cutoff)
    if min_cutoff is not None:
        if isinstance(min_cutoff, Number):
            min_cutoff = float(min_cutoff)
        else:
            raise TypeError('min_cutoff should be a number or None')

    max_cutoff = kwargs.get('max_cutoff', None)
    if max_cutoff is not None:
        if isinstance(max_cutoff, Number):
            max_cutoff = float(max_cutoff)
        else:
            raise TypeError('max_cutoff should be a number or None')

    n_nodes = kwargs.get('n_nodes', 0)
    num_iter = int(kwargs.get('num_iter', 20))
    map = kwargs.get('map', False)

    if not isinstance(n_nodes, int):
        raise TypeError('n_nodes should be an integer')

    if n_nodes > 0:
        make_nodes = True
    else:
        make_nodes = False
        map = True
        LOGGER.info('As n_nodes is less than or equal to 0, no nodes will be'
                    ' made and the raw map will be returned')

    emd = EMDMAP(stream, min_cutoff, max_cutoff)

    if make_nodes:
        title_suffix = kwargs.get('title_suffix', '')
        atomgroup = AtomGroup(
            str(kwargs.get('title', 'Unknown')) + title_suffix)
        atomgroup._n_atoms = n_nodes

        coordinates = np.zeros((n_nodes, 3), dtype=float)
        atomnames = np.zeros(n_nodes, dtype=ATOMIC_FIELDS['name'].dtype)
        resnames = np.zeros(n_nodes, dtype=ATOMIC_FIELDS['resname'].dtype)
        resnums = np.zeros(n_nodes, dtype=ATOMIC_FIELDS['resnum'].dtype)
        chainids = np.zeros(n_nodes, dtype=ATOMIC_FIELDS['chain'].dtype)

        trn = TRNET(n_nodes=n_nodes)
        trn.inputMap(emd, sample='density')

        trn.run(tmax=num_iter)
        for i in range(n_nodes):
            coordinates[i, :] = trn.W[i, :]
            atomnames[i] = 'B'
            resnames[i] = 'CGB'
            resnums[i] = i + 1
            chainids[i] = 'X'

        atomgroup.setCoords(coordinates)
        atomgroup.setNames(atomnames)
        atomgroup.setResnames(resnames)
        atomgroup.setResnums(resnums)
        atomgroup.setChids(chainids)

    if make_nodes:
        if map:
            return atomgroup, emd
        else:
            return atomgroup
    else:
        return emd

Exemple #9

0

Afficher le fichier

Fichier : exanm.py Projet : Python3pkg/ProDy

    def buildMembrane(self, coords, **kwargs):
        """Build Hessian matrix for given coordinate set.

        :arg coords: a coordinate set or an object with ``getCoords`` method
        :type coords: :class:`numpy.ndarray`

        :arg membrane_hi: the maximum z coordinate of the pdb default is 13.0
        :type membrane_hi: float

        :arg membrane_lo: the minimum z coordinate of the pdb default is -13.0
        :type membrane_lo: float

        :arg R: radius of all membrane in x-y direction default is 80. 
        :type R: float

        :arg r: radius of individual barrel-type membrane protein default is 2.5.
        :type 
        
        :arg lat: lattice type which could be FCC(face-centered-cubic)(default), 
        SC(simple cubic), SH(simple hexagonal)
        :type lat: str
        """
        if type(coords) is AtomGroup:
            buildAg = True
        else:
            buildAg = False

        try:
            coords = (coords._getCoords()
                      if hasattr(coords, '_getCoords') else coords.getCoords())
        except AttributeError:
            try:
                checkCoords(coords)
            except TypeError:
                raise TypeError('coords must be a Numpy array or an object '
                                'with `getCoords` method')

        self._n_atoms = natoms = int(coords.shape[0])

        pxlo = min(np.append(coords[:, 0], 10000))
        pxhi = max(np.append(coords[:, 0], -10000))
        pylo = min(np.append(coords[:, 1], 10000))
        pyhi = max(np.append(coords[:, 1], -10000))
        pzlo = min(np.append(coords[:, 2], 10000))
        pzhi = max(np.append(coords[:, 2], -10000))

        membrane_hi = float(kwargs.get('membrane_hi', 13.0))
        membrane_lo = float(kwargs.get('membrane_lo', -13.0))
        R = float(kwargs.get('R', 80))
        r = float(kwargs.get('r', 5))
        lat = str(kwargs.get('lat', 'FCC'))
        lpv = assign_lpvs(lat)

        imax = (R + lpv[0, 2] * (membrane_hi - membrane_lo) / 2.) / r
        jmax = (R + lpv[1, 2] * (membrane_hi - membrane_lo) / 2.) / r
        kmax = (R + lpv[2, 2] * (membrane_hi - membrane_lo) / 2.) / r

        #print pxlo, pxhi, pylo, pyhi, pzlo, pzhi
        #print lpv[0,2],lpv[1,2],lpv[2,2]
        #print R,r,imax,jmax,kmax
        membrane = zeros((1, 3))

        LOGGER.timeit('_membrane')
        membrane = zeros((1, 3))
        atm = 0
        for i in range(-int(imax), int(imax + 1)):
            for j in range(-int(jmax), int(jmax + 1)):
                for k in range(-int(kmax), int(kmax + 1)):
                    X = zeros((1, 3))
                    for p in range(3):
                        X[0, p] = 2. * r * (i * lpv[0, p] + j * lpv[1, p] +
                                            k * lpv[2, p])
                    dd = 0
                    for p in range(3):
                        dd += X[0, p]**2
                    if dd < R**2 and X[0,
                                       2] > membrane_lo and X[0,
                                                              2] < membrane_hi:
                        if X[0, 0] > pxlo - R / 2 and X[
                                0, 0] < pxhi + R / 2 and X[
                                    0, 1] > pylo - R / 2 and X[
                                        0, 1] < pyhi + R / 2 and X[
                                            0, 2] > pzlo and X[0, 2] < pzhi:
                            if checkClash(X, coords[:natoms, :], radius=5):
                                if atm == 0:
                                    membrane = X
                                else:
                                    membrane = np.append(membrane, X, axis=0)
                                atm = atm + 1
        #print atm

        self._membrane = AtomGroup(title="Membrane")
        self._membrane.setCoords(membrane)
        self._membrane.setResnums(list(range(atm)))
        self._membrane.setResnames(["NE1" for i in range(atm)])
        self._membrane.setChids(["Q" for i in range(atm)])
        self._membrane.setElements(["Q1" for i in range(atm)])
        self._membrane.setNames(["Q1" for i in range(atm)])
        LOGGER.report('Membrane was built in %2.fs.', label='_membrane')

Exemple #10

0

Afficher le fichier

Fichier : exanm.py Projet : minghao2016/ProDy

    def buildMembrane(self, coords, **kwargs):
        """Build membrane lattice around **coords**.

        :arg coords: a coordinate set or an object with ``getCoords`` method
        :type coords: :class:`numpy.ndarray`

        :arg membrane_high: the maximum z coordinate of the membrane. Default is **13.0**
        :type membrane_high: float

        :arg membrane_low: the minimum z coordinate of the membrane. Default is **-13.0**
        :type membrane_low: float

        :arg R: radius of all membrane in x-y direction. Default is **80**
        :type R: float

        :arg Ri: inner radius of the membrane in x-y direction if it needs to be hollow. 
                 Default is **0**, which is not hollow
        :type Ri: float

        :arg r: radius of each membrane node. Default is **3.1**
        :type r: float
        
        :arg lat: lattice type which could be **FCC** (face-centered-cubic, default), 
                  **SC** (simple cubic), **SH** (simple hexagonal)
        :type lat: str

        :arg exr: exclusive radius of each protein node. Default is **5.0**
        :type exr: float

        :arg hull: whether use convex hull to determine the protein's interior. 
                   Turn it off if protein is multimer. Default is **True**
        :type hull: bool

        :arg center: whether transform the structure to the origin (only x- and y-axis). 
                     Default is **True**
        :type center: bool
        """

        atoms = coords

        try:
            coords = (coords._getCoords()
                      if hasattr(coords, '_getCoords') else coords.getCoords())
        except AttributeError:
            try:
                checkCoords(coords)
            except TypeError:
                raise TypeError('coords must be a Numpy array or an object '
                                'with `getCoords` method')

        self._n_atoms = int(coords.shape[0])

        LOGGER.timeit('_membrane')

        depth = kwargs.pop('depth', None)
        h = depth / 2 if depth is not None else None

        h = kwargs.pop('h', h)
        if h is not None:
            h = float(h)
            hu = h
            hl = -h
        else:
            hu = kwargs.pop('membrane_high', 13.0)
            hu = kwargs.pop('high', hu)
            hu = float(hu)

            hl = kwargs.pop('membrane_low', -13.0)
            hl = kwargs.pop('low', hl)
            hl = float(hl)

        R = float(kwargs.pop('R', 80.))
        Ri = float(kwargs.pop('Ri', 0.))
        r = float(kwargs.pop('r', 3.1))
        lat = str(kwargs.pop('lat', 'FCC'))
        exr = float(kwargs.pop('exr', 5.))
        use_hull = kwargs.pop('hull', True)
        centering = kwargs.pop('center', True)

        V = assign_lpvs(lat)

        if centering:
            c0 = coords.mean(axis=0)
            c0[-1] = 0.
            coords -= c0
        # determine transmembrane part
        torf = np.logical_and(coords[:, -1] < hu, coords[:, -1] > hl)
        transmembrane = coords[torf, :]

        if not np.any(torf):
            raise ValueError(
                'No region was identified as membrane. Please use a structure from opm/ppm.'
            )

        if use_hull:
            from scipy.spatial import ConvexHull
            hull = ConvexHull(transmembrane)
        else:
            hull = transmembrane

        ## determine the bound for ijk
        imax = (R + V[0, 2] * (hu - hl) / 2.) / r
        jmax = (R + V[1, 2] * (hu - hl) / 2.) / r
        kmax = (R + V[2, 2] * (hu - hl) / 2.) / r

        imax = int(ceil(imax))
        jmax = int(ceil(jmax))
        kmax = int(ceil(kmax))

        membrane = []
        atm = 0
        for i in range(-imax, imax):
            for j in range(-jmax, jmax):
                for k in range(-kmax, kmax):
                    c = array([i, j, k])
                    xyz = 2. * r * dot(c, V)

                    if xyz[2]>hl and xyz[2]<hu and \
                       xyz[0]>-R and xyz[0]<R and \
                       xyz[1]>-R and xyz[1]<R:
                        dd = norm(xyz[:2])
                        if dd < R and dd > Ri:
                            if checkClash(xyz, hull, radius=exr):
                                membrane.append(xyz)
                                atm = atm + 1

        membrane = array(membrane)

        if len(membrane) == 0:
            self._membrane = None
            LOGGER.warn(
                'no membrane is built. The protein should be transformed to the correct origin as in OPM'
            )
            return coords
        else:
            self._membrane = AtomGroup(title="Membrane")
            self._membrane.setCoords(membrane)
            self._membrane.setResnums(range(atm))
            self._membrane.setResnames(["NE1" for i in range(atm)])
            self._membrane.setChids(["Q" for i in range(atm)])
            self._membrane.setElements(["Q1" for i in range(atm)])
            self._membrane.setNames(["Q1" for i in range(atm)])
            LOGGER.report('Membrane was built in %2.fs.', label='_membrane')

            coords = self._combineMembraneProtein(atoms)
            return coords

Exemple #11

0

Afficher le fichier

Fichier : pdbfile.py Projet : luponzo86/ProDy

def parsePDBStream(stream, **kwargs):
    """Returns an :class:`.AtomGroup` and/or dictionary containing header data
    parsed from a stream of PDB lines.

    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    model = kwargs.get('model')
    header = kwargs.get('header', False)
    assert isinstance(header, bool), 'header must be a boolean'
    chain = kwargs.get('chain')
    subset = kwargs.get('subset')
    altloc = kwargs.get('altloc', 'A')
    if model is not None:
        if isinstance(model, Integral):
            if model < 0:
                raise ValueError('model must be greater than 0')
        else:
            raise TypeError('model must be an integer, {0} is invalid'.format(
                str(model)))
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'.format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = chain + title_suffix
    ag = None
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    elif model != 0:
        ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
        n_csets = 0

    biomol = kwargs.get('biomol', False)
    auto_secondary = None
    secondary = kwargs.get('secondary')
    if not secondary:
        auto_secondary = SETTINGS.get('auto_secondary')
        secondary = auto_secondary
    split = 0
    hd = None
    if model != 0:
        LOGGER.timeit()
        try:
            lines = stream.readlines()
        except AttributeError as err:
            try:
                lines = stream.read().split('\n')
            except AttributeError:
                raise err
        if not len(lines):
            raise ValueError('empty PDB file or stream')
        if header or biomol or secondary:
            hd, split = getHeaderDict(lines)
        _parsePDBLines(ag, lines, split, model, chain, subset, altloc)
        if ag.numAtoms() > 0:
            LOGGER.report('{0} atoms and {1} coordinate set(s) were '
                          'parsed in %.2fs.'.format(
                              ag.numAtoms(),
                              ag.numCoordsets() - n_csets))
        else:
            ag = None
            LOGGER.warn('Atomic data could not be parsed, please '
                        'check the input file.')
    elif header:
        hd, split = getHeaderDict(stream)

    if ag is not None and isinstance(hd, dict):
        if secondary:
            if auto_secondary:
                try:
                    ag = assignSecstr(hd, ag)
                except ValueError:
                    pass
            else:
                ag = assignSecstr(hd, ag)
        if biomol:
            ag = buildBiomolecules(hd, ag)

            if isinstance(ag, list):
                LOGGER.info('Biomolecular transformations were applied, {0} '
                            'biomolecule(s) are returned.'.format(len(ag)))
            else:
                LOGGER.info('Biomolecular transformations were applied to the '
                            'coordinate data.')

    if model != 0:
        if header:
            return ag, hd
        else:
            return ag
    else:
        return hd

Exemple #12

0

Afficher le fichier

Fichier : pdbligands.py Projet : agiorgetti1971/ProDy-1

def fetchPDBLigand(cci, filename=None):
    """Fetch PDB ligand data from PDB_ for chemical component *cci*.
    *cci* may be 3-letter chemical component identifier or a valid XML
    filename.  If *filename* is given, XML file will be saved with that name.

    If you query ligand data frequently, you may configure ProDy to save XML
    files in your computer.  Set ``ligand_xml_save`` option **True**, i.e.
    ``confProDy(ligand_xml_save=True)``.  Compressed XML files will be save
    to ProDy package folder, e.g. :file:`/home/user/.prody/pdbligands`.  Each
    file is around 5Kb when compressed.

    This function is compatible with PDBx/PDBML v 4.0.

    Ligand data is returned in a dictionary.  Ligand coordinate atom data with
    *model* and *ideal* coordinate sets are also stored in this dictionary.
    Note that this dictionary will contain data that is present in the XML
    file and all Ligand Expo XML files do not contain every possible data
    field.  So, it may be better if you use :meth:`dict.get` instead of
    indexing the dictionary, e.g. to retrieve formula weight (or relative
    molar mass) of the chemical component use ``data.get('formula_weight')``
    instead of ``data['formula_weight']`` to avoid exceptions when this data
    field is not found in the XML file.  URL and/or path of the XML file are
    returned in the dictionary with keys ``url`` and ``path``, respectively.

    Following example downloads data for ligand STI (a.k.a. Gleevec and
    Imatinib) and calculates RMSD between model (X-ray structure 1IEP) and
    ideal (energy minimized) coordinate sets:

    .. ipython:: python

       from prody import *
       ligand_data = fetchPDBLigand('STI')
       ligand_data['model_coordinates_db_code']
       ligand_model = ligand_data['model']
       ligand_ideal = ligand_data['ideal']
       transformation = superpose(ligand_ideal.noh, ligand_model.noh)
       calcRMSD(ligand_ideal.noh, ligand_model.noh)"""

    if not isinstance(cci, str):
        raise TypeError('cci must be a string')
    if isfile(cci):
        inp = openFile(cci)
        xml = inp.read()
        inp.close()
        url = None
        path = cci
        cci = splitext(splitext(split(cci)[1])[0])[0].upper()
    elif len(cci) > 4 or not cci.isalnum():
        raise ValueError('cci must be 3-letters long and alphanumeric or '
                         'a valid filename')
    else:
        xml = None
        cci = cci.upper()
        if SETTINGS.get('ligand_xml_save'):
            folder = join(getPackagePath(), 'pdbligands')
            if not isdir(folder):
                makePath(folder)
            xmlgz = path = join(folder, cci + '.xml.gz')
            if isfile(xmlgz):
                with openFile(xmlgz) as inp:
                    xml = inp.read()
        else:
            path = None
        #url = ('http://ligand-expo.rcsb.org/reports/{0[0]}/{0}/{0}'
        #       '.xml'.format(cci.upper()))
        url = 'http://files.rcsb.org/ligands/download/{0}.xml'.format(
            cci.upper())
        if not xml:
            #'http://www.pdb.org/pdb/files/ligand/{0}.xml'
            try:
                inp = openURL(url)
            except IOError:
                raise IOError(
                    'XML file for ligand {0} is not found online'.format(cci))
            else:
                xml = inp.read()
                inp.close()
            if filename:
                out = openFile(filename, mode='w', folder=folder)
                out.write(xml)
                out.close()
            if SETTINGS.get('ligand_xml_save'):
                with openFile(xmlgz, 'w') as out:
                    out.write(xml)

    import xml.etree.cElementTree as ET

    root = ET.XML(xml)
    if (root.get('{http://www.w3.org/2001/XMLSchema-instance}'
                 'schemaLocation') !=
            'http://pdbml.pdb.org/schema/pdbx-v40.xsd pdbx-v40.xsd'):
        LOGGER.warn('XML is not in PDBx/PDBML v 4.0 format, resulting '
                    'dictionary may not contain all data fields')
    ns = root.tag[:root.tag.rfind('}') + 1]
    len_ns = len(ns)
    dict_ = {'url': url, 'path': path}

    for child in list(root.find(ns + 'chem_compCategory')[0]):
        tag = child.tag[len_ns:]
        if tag.startswith('pdbx_'):
            tag = tag[5:]
        dict_[tag] = child.text
    dict_['formula_weight'] = float(dict_.get('formula_weight'))

    identifiers_and_descriptors = []
    results = root.find(ns + 'pdbx_chem_comp_identifierCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    results = root.find(ns + 'pdbx_chem_comp_descriptorCategory')
    if results:
        identifiers_and_descriptors.extend(results)
    for child in identifiers_and_descriptors:
        program = child.get('program').replace(' ', '_')
        type_ = child.get('type').replace(' ', '_')
        dict_[program + '_' + type_] = child[0].text
        dict_[program + '_version'] = child.get('program_version')

    dict_['audits'] = [
        (audit.get('action_type'), audit.get('date'))
        for audit in list(root.find(ns + 'pdbx_chem_comp_auditCategory'))
    ]

    atoms = list(root.find(ns + 'chem_comp_atomCategory'))
    n_atoms = len(atoms)
    ideal_coords = np.zeros((n_atoms, 3))
    model_coords = np.zeros((n_atoms, 3))

    atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    elements = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['element'].dtype)
    resnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['resname'].dtype)
    charges = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    resnums = np.ones(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype)

    alternate_atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype)
    leaving_atom_flags = np.zeros(n_atoms, np.bool)
    aromatic_flags = np.zeros(n_atoms, np.bool)
    stereo_configs = np.zeros(n_atoms, np.bool)
    ordinals = np.zeros(n_atoms, int)

    name2index = {}

    for i, atom in enumerate(atoms):
        data = dict([(child.tag[len_ns:], child.text) for child in list(atom)])

        name = data.get('pdbx_component_atom_id', 'X')
        name2index[name] = i
        atomnames[i] = name
        elements[i] = data.get('type_symbol', 'X')
        resnames[i] = data.get('pdbx_component_comp_id', 'UNK')
        charges[i] = float(data.get('charge', 0))

        alternate_atomnames[i] = data.get('alt_atom_id', 'X')
        leaving_atom_flags[i] = data.get('pdbx_leaving_atom_flag') == 'Y'
        aromatic_flags[i] = data.get('pdbx_atomatic_flag') == 'Y'
        stereo_configs[i] = data.get('pdbx_stereo_config') == 'Y'
        ordinals[i] = int(data.get('pdbx_ordinal', 0))

        model_coords[i, 0] = float(data.get('model_Cartn_x', 0))
        model_coords[i, 1] = float(data.get('model_Cartn_y', 0))
        model_coords[i, 2] = float(data.get('model_Cartn_z', 0))
        ideal_coords[i, 0] = float(data.get('pdbx_model_Cartn_x_ideal', 0))
        ideal_coords[i, 1] = float(data.get('pdbx_model_Cartn_y_ideal', 0))
        ideal_coords[i, 2] = float(data.get('pdbx_model_Cartn_z_ideal', 0))

    pdbid = dict_.get('model_coordinates_db_code')
    if pdbid:
        model = AtomGroup(cci + ' model ({0})'.format(pdbid))
    else:
        model = AtomGroup(cci + ' model')
    model.setCoords(model_coords)
    model.setNames(atomnames)
    model.setResnames(resnames)
    model.setResnums(resnums)
    model.setElements(elements)
    model.setCharges(charges)
    model.setFlags('leaving_atom_flags', leaving_atom_flags)
    model.setFlags('aromatic_flags', aromatic_flags)
    model.setFlags('stereo_configs', stereo_configs)
    model.setData('ordinals', ordinals)
    model.setData('alternate_atomnames', alternate_atomnames)
    dict_['model'] = model
    ideal = model.copy()
    ideal.setTitle(cci + ' ideal')
    ideal.setCoords(ideal_coords)
    dict_['ideal'] = ideal

    bonds = []
    warned = set()
    for bond in list(root.find(ns + 'chem_comp_bondCategory') or bonds):
        name_1 = bond.get('atom_id_1')
        name_2 = bond.get('atom_id_2')
        try:
            bonds.append((name2index[name_1], name2index[name_2]))
        except KeyError:
            if name_1 not in warned and name_1 not in name2index:
                warned.add(name_1)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_1), cci))
            if name_2 not in warned and name_2 not in name2index:
                warned.add(name_2)
                LOGGER.warn('{0} specified {1} in bond category is not '
                            'a valid atom name.'.format(repr(name_2), cci))
    if bonds:
        bonds = np.array(bonds, int)
        model.setBonds(bonds)
        ideal.setBonds(bonds)
    return dict_

Exemple #13

0

Afficher le fichier

def parseMMCIFStream(stream, **kwargs):
    """Returns an :class:`.AtomGroup` and/or a class:`.StarDict` 
    containing header data parsed from a stream of CIF lines.
    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    model = kwargs.get('model')
    subset = kwargs.get('subset')
    chain = kwargs.get('chain')
    altloc = kwargs.get('altloc', 'A')
    header = kwargs.get('header', False)

    if model is not None:
        if isinstance(model, int):
            if model < 0:
                raise ValueError('model must be greater than 0')
        else:
            raise TypeError('model must be an integer, {0} is invalid'.format(
                str(model)))
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'.format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = '_' + chain + title_suffix

    ag = None
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    elif model != 0:
        ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
        n_csets = 0

    if model != 0:
        LOGGER.timeit()
        try:
            lines = stream.readlines()
        except AttributeError as err:
            try:
                lines = stream.read().split('\n')
            except AttributeError:
                raise err
        if not len(lines):
            raise ValueError('empty PDB file or stream')

        if header:
            ag, header = _parseMMCIFLines(ag, lines, model, chain, subset,
                                          altloc, header)
        else:
            ag = _parseMMCIFLines(ag, lines, model, chain, subset, altloc,
                                  header)

        if ag.numAtoms() > 0:
            LOGGER.report('{0} atoms and {1} coordinate set(s) were '
                          'parsed in %.2fs.'.format(
                              ag.numAtoms(),
                              ag.numCoordsets() - n_csets))
        else:
            ag = None
            LOGGER.warn('Atomic data could not be parsed, please '
                        'check the input file.')
        if header:
            return ag, StarDict(*header,
                                title=str(kwargs.get('title', 'Unknown')))
        return ag

Exemple #14

0

Afficher le fichier

Fichier : psffile.py Projet : npabon/ProDy

def parsePSF(filename, title=None, ag=None):
    """Return an :class:`.AtomGroup` instance storing data parsed from X-PLOR
    format PSF file *filename*.  Atom and bond information is parsed from the
    file.  If *title* is not given, *filename* will be set as the title of the
    :class:`.AtomGroup` instance.  An :class:`.AtomGroup` instance may be
    provided as *ag* argument.  When provided, *ag* must have the same number
    of atoms in the same order as the file.  Data from PSF file will be added
    to the *ag*.  This may overwrite present data if it overlaps with PSF file
    content.  Note that this function does not evaluate angles, dihedrals, and
    impropers sections."""

    if ag is not None:
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')

    psf = openFile(filename, 'rb')
    line = psf.readline()
    i_line = 1
    while line:
        line = line.strip()
        if line.endswith('!NATOM'):
            n_atoms = int(line.split('!')[0])
            break
        line = psf.readline()
        i_line += 1
    if title is None:
        title = os.path.splitext(os.path.split(filename)[1])[0]
    else:
        title = str(title)
    if ag is None:
        ag = AtomGroup(title)
    else:
        if n_atoms != ag.numAtoms():
            raise ValueError('ag and PSF file must have same number of atoms')

    serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype)
    segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype)
    resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype)
    resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype)
    atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype)
    atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype)
    charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype)
    masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype)

    lines = psf.readlines(71 * (n_atoms + 5))
    if len(lines) < n_atoms:
        raise IOError('number of lines in PSF is less than the number of '
                      'atoms')

    for i, line in enumerate(lines):
        if i == n_atoms:
            break
        i_line += 1
        if len(line) <= 71:
            serials[i] = line[:8]
            segnames[i] = line[9:13].strip()
            resnums[i] = line[14:19]
            resnames[i] = line[19:23].strip()
            atomnames[i] = line[24:28].strip()
            atomtypes[i] = line[29:35].strip()
            charges[i] = line[35:44]
            masses[i] = line[50:60]
        else:
            items = line.split()
            serials[i] = items[0]
            segnames[i] = items[1]
            resnums[i] = items[2]
            resnames[i] = items[3]
            atomnames[i] = items[4]
            atomtypes[i] = items[5]
            charges[i] = items[6]
            masses[i] = items[7]

    i = n_atoms
    while 1:
        line = lines[i].split()
        if len(line) >= 2 and line[1] == '!NBOND:':
            n_bonds = int(line[0])
            break
        i += 1
    lines = ''.join(lines[i + 1:]) + psf.read(n_bonds / 4 * 71)
    array = fromstring(lines, count=n_bonds * 2, dtype=int, sep=' ')
    if len(array) != n_bonds * 2:
        raise IOError('number of bonds expected and parsed do not match')

    psf.close()
    ag.setSerials(serials)
    ag.setSegnames(segnames)
    ag.setResnums(resnums)
    ag.setResnames(resnames)
    ag.setNames(atomnames)
    ag.setTypes(atomtypes)
    ag.setCharges(charges)
    ag.setMasses(masses)

    array = add(array, -1, array)
    ag.setBonds(array.reshape((n_bonds, 2)))

    return ag