Example #1
0
def parsePQR(filename, **kwargs):
    """Returns an :class:`.AtomGroup` containing data parsed from PDB lines.

    :arg filename: a PQR filename
    :type filename: str"""

    title = kwargs.get('title', kwargs.get('name'))
    model = 1
    header = False
    chain = kwargs.get('chain')
    subset = kwargs.get('subset')
    altloc = kwargs.get('altloc', 'A')
    max_n_atoms = kwargs.get('max_n_atoms', 1e5)
    if not os.path.isfile(filename):
        raise IOError('No such file: {0}'.format(repr(filename)))
    if title is None:
        fn, ext = os.path.splitext(os.path.split(filename)[1])
        if ext == '.gz':
            fn, ext = os.path.splitext(fn)
        title = fn.lower()
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'
                             .format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = '_' + chain + title_suffix
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    else:
        ag = AtomGroup(title + title_suffix)
        n_csets = 0

    pqr = openFile(filename, 'rt')
    lines = pqr.readlines()
    pqr.close()
    LOGGER.timeit()
    ag = _parsePDBLines(ag, lines, split=0, model=1, chain=chain,
                        subset=subset, altloc_torf=False, format='pqr', 
                        max_n_atoms=max_n_atoms)
    if ag.numAtoms() > 0:
        LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(ag.numAtoms(),
                      ag.numCoordsets() - n_csets))
        return ag
    else:
        return None
Example #2
0
def parsePQR(filename, **kwargs):
    """Returns an :class:`.AtomGroup` containing data parsed from PDB lines.

    :arg filename: a PQR filename
    :type filename: str"""

    title = kwargs.get('title', kwargs.get('name'))
    chain = kwargs.get('chain')
    subset = kwargs.get('subset')
    if not os.path.isfile(filename):
        raise IOError('No such file: {0}'.format(repr(filename)))
    if title is None:
        fn, ext = os.path.splitext(os.path.split(filename)[1])
        if ext == '.gz':
            fn, ext = os.path.splitext(fn)
        title = fn.lower()
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'.format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = '_' + chain + title_suffix
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    else:
        ag = AtomGroup(title + title_suffix)
        n_csets = 0

    pqr = openFile(filename, 'rt')
    lines = pqr.readlines()
    pqr.close()
    LOGGER.timeit()
    ag = _parsePDBLines(ag,
                        lines,
                        split=0,
                        model=1,
                        chain=chain,
                        subset=subset,
                        altloc_torf=False,
                        format='pqr')
    if ag.numAtoms() > 0:
        LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(ag.numAtoms(),
                                                ag.numCoordsets() - n_csets))
        return ag
    else:
        return None
Example #3
0
def parseEMDStream(stream, **kwargs):
    """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file.

    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    cutoff = float(kwargs.get('cutoff', 1.20))
    n_nodes = int(kwargs.get('n_nodes', 1000))
    num_iter = int(kwargs.get('num_iter', 20))

    ag = None
    title_suffix = ''
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    else: 
        ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
        n_csets = 0

    biomol = kwargs.get('biomol', False)
    hd = None
    LOGGER.warn('Building coordinates from electron density map. This may take a while.')
    LOGGER.timeit()
    _parseEMDLines(ag, stream, cutoff=cutoff, n_nodes=n_nodes, num_iter=num_iter, format='EMD')
    LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(ag.numAtoms(),
                         ag.numCoordsets() - n_csets))
    return ag
Example #4
0
def parseEMDStream(stream, **kwargs):
    """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file.

    :arg stream: Any object with the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    cutoff = kwargs.get('cutoff', None)
    if cutoff is not None:
        cutoff = float(cutoff)

    n_nodes = kwargs.get('n_nodes', 0)
    num_iter = int(kwargs.get('num_iter', 20))
    map = kwargs.get('map', False)
    make_nodes = kwargs.get('make_nodes', False)

    if n_nodes > 0:
        make_nodes = True
        n_nodes = int(n_nodes)

    if map is False and make_nodes is False:
        LOGGER.warn(
            'At least one of map and make_nodes should be True. '
            'Setting map to False was an intentional change from the default '
            'behaviour so make_nodes has been set to True with n_nodes=1000.')
        make_nodes = True
        n_nodes = 1000

    title_suffix = kwargs.get('title_suffix', '')
    atomgroup = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
    atomgroup._n_atoms = n_nodes

    if make_nodes:
        LOGGER.info(
            'Building coordinates from electron density map. This may take a while.'
        )
        LOGGER.timeit()

        if map:
            emd, atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                            num_iter=num_iter, map=map, make_nodes=make_nodes)
        else:
            atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                       num_iter=num_iter, map=map, make_nodes=make_nodes)

        LOGGER.report('{0} pseudoatoms were fitted in %.2fs.'.format(
            atomgroup.numAtoms(), atomgroup.numCoordsets()))
    else:
        emd = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                             num_iter=num_iter, map=map, make_nodes=make_nodes)

    if make_nodes:
        if map:
            return emd, atomgroup
        else:
            return atomgroup
    else:
        return emd
Example #5
0
def parseEMDStream(stream, **kwargs):
    """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file.

    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    cutoff = kwargs.get('cutoff', None)
    if cutoff is not None:
        cutoff = float(cutoff)

    n_nodes = int(kwargs.get('n_nodes', 1000))
    num_iter = int(kwargs.get('num_iter', 20))
    return_map = kwargs.get('return_map',False)
    make_nodes = kwargs.get('make_nodes',False)

    if return_map is False and make_nodes is False:
        LOGGER.warn('At least one of return_map and make_nodes should be True. '
                    'Setting make_nodes to False was an intentional change from the default '
                    'so return_map has been set to True.')
        kwargs['return_map'] = True

    title_suffix = kwargs.get('title_suffix','')
    atomgroup = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)

    if make_nodes:
        LOGGER.info('Building coordinates from electron density map. This may take a while.')
        LOGGER.timeit()

        if return_map:
            emd, atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                            num_iter=num_iter, return_map=return_map, \
                                            make_nodes=make_nodes)
        else:
            atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                       num_iter=num_iter, return_map=return_map, \
                                       make_nodes=make_nodes)
        LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(atomgroup.numAtoms(), atomgroup.numCoordsets()))
    else: 
        emd = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                             num_iter=num_iter, return_map=return_map, \
                             make_nodes=make_nodes)

    if make_nodes:
        if return_map:
            return emd, atomgroup
        else:
            return atomgroup
    else:
        return emd
Example #6
0
def parseEMDStream(stream, **kwargs):
    """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file.

    :arg stream: Any object with the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    cutoff = kwargs.get('cutoff', None)
    if cutoff is not None:
        cutoff = float(cutoff)

    n_nodes = int(kwargs.get('n_nodes', 1000))
    num_iter = int(kwargs.get('num_iter', 20))
    map = kwargs.get('map',True)
    make_nodes = kwargs.get('make_nodes',False)

    if map is False and make_nodes is False:
        LOGGER.warn('At least one of map and make_nodes should be True. '
                    'Setting map to False was an intentional change from the default '
                    'behaviour so make_nodes has been set to True.')
        make_nodes = True

    title_suffix = kwargs.get('title_suffix','')
    atomgroup = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
    atomgroup._n_atoms = n_nodes

    if make_nodes:
        LOGGER.info('Building coordinates from electron density map. This may take a while.')
        LOGGER.timeit()

        if map:
            emd, atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                            num_iter=num_iter, map=map, make_nodes=make_nodes)
        else:
            atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                                       num_iter=num_iter, map=map, make_nodes=make_nodes)

        LOGGER.report('{0} atoms and {1} coordinate sets were '
                      'parsed in %.2fs.'.format(atomgroup.numAtoms(), atomgroup.numCoordsets()))
    else: 
        emd = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \
                             num_iter=num_iter, map=map, make_nodes=make_nodes)

    if make_nodes:
        if map:
            return emd, atomgroup
        else:
            return atomgroup
    else:
        return emd
Example #7
0
def parsePSF(filename, title=None, ag=None):
    """Returns an :class:`.AtomGroup` instance storing data parsed from X-PLOR
    format PSF file *filename*.  Atom and bond information is parsed from the
    file.  If *title* is not given, *filename* will be set as the title of the
    :class:`.AtomGroup` instance.  An :class:`.AtomGroup` instance may be
    provided as *ag* argument.  When provided, *ag* must have the same number
    of atoms in the same order as the file.  Data from PSF file will be added
    to the *ag*.  This may overwrite present data if it overlaps with PSF file
    content.  
    
    This function now includes the angles, dihedrals, and impropers sections
    as well as donors, acceptors and crossterms!"""

    if ag is not None:
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')

    psf = openFile(filename, 'rb')
    line = psf.readline()
    while line:
        line = line.strip()
        if line.endswith(b'!NATOM'):
            n_atoms = int(line.split(b'!')[0])
            break
        line = psf.readline()
    if title is None:
        title = os.path.splitext(os.path.split(filename)[1])[0]
    else:
        title = str(title)
    if ag is None:
        ag = AtomGroup(title)
    else:
        if n_atoms != ag.numAtoms():
            raise ValueError('ag and PSF file must have same number of atoms')

    serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype)
    segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype)
    resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype)
    resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype)
    atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype)
    atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype)
    charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype)
    masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype)

    n = 0
    n_bonds = 0
    for line in psf:
        if line.strip() == b'':
            continue
        if b'!NBOND:' in line.upper():
            items = line.split()
            n_bonds = int(items[0])
            break
        if n + 1 > n_atoms:
            continue

        if len(line) <= 71:
            serials[n] = line[:8]
            segnames[n] = line[9:13].strip()
            resnums[n] = line[14:19]
            resnames[n] = line[19:23].strip()
            atomnames[n] = line[24:28].strip()
            atomtypes[n] = line[29:35].strip()
            charges[n] = line[35:44]
            masses[n] = line[50:60]
        else:
            items = line.split()
            serials[n] = items[0]
            segnames[n] = items[1]
            resnums[n] = items[2]
            resnames[n] = items[3]
            atomnames[n] = items[4]
            atomtypes[n] = items[5]
            charges[n] = items[6]
            masses[n] = items[7]
        n += 1

    if n < n_atoms:
        raise IOError(
            'number of lines in PSF atoms block is less than the number of '
            'atoms')

    n_angles = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NTHETA' in line:
            items = line.split()
            n_angles = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    b_array = fromstring(lines, count=n_bonds * 2, dtype=int, sep=' ')
    if len(b_array) != n_bonds * 2:
        raise IOError('number of bonds expected and parsed do not match')

    n_dihedrals = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NPHI' in line:
            items = line.split()
            n_dihedrals = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    a_array = fromstring(lines, count=n_angles * 3, dtype=int, sep=' ')
    if len(a_array) != n_angles * 3:
        raise IOError('number of angles expected and parsed do not match')

    n_impropers = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NIMPHI' in line:
            items = line.split()
            n_impropers = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    d_array = fromstring(lines, count=n_dihedrals * 4, dtype=int, sep=' ')
    if len(d_array) != n_dihedrals * 4:
        raise IOError('number of dihedrals expected and parsed do not match')

    n_donors = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NDON' in line:
            items = line.split()
            n_donors = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    i_array = fromstring(lines, count=n_impropers * 4, dtype=int, sep=' ')
    if len(i_array) != n_impropers * 4:
        raise IOError('number of impropers expected and parsed do not match')

    n_acceptors = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NACC' in line:
            items = line.split()
            n_acceptors = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    do_array = fromstring(lines, count=n_donors * 2, dtype=int, sep=' ')
    if len(do_array) != n_donors * 2:
        raise IOError('number of donors expected and parsed do not match')

    n_exclusions = 0
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!NNB' in line:
            items = line.split()
            n_exclusions = int(items[0])
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    ac_array = fromstring(lines, count=n_acceptors * 2, dtype=int, sep=' ')
    if len(ac_array) != n_acceptors * 2:
        raise IOError('number of acceptors expected and parsed do not match')

    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b'':
            continue
        if b'!' in line:
            break
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    nbe_array = fromstring(lines, count=n_exclusions * 2, dtype=int, sep=' ')
    if len(nbe_array) != n_exclusions * 2:
        raise IOError(
            'number of nonbonded exclusions expected and parsed do not match')

    n_crossterms = 0
    for i, line in enumerate(psf):
        if b'!NCRTERM' in line:
            items = line.split()
            n_crossterms = int(items[0])
            break

    lines = []
    for i, line in enumerate(psf):
        lines.append(line.decode(encoding='UTF-8'))

    lines = ''.join(lines)
    c_array = fromstring(lines, count=n_crossterms * 4, dtype=int, sep=' ')
    if len(c_array) != n_crossterms * 4:
        raise IOError('number of crossterms expected and parsed do not match')

    psf.close()
    ag.setSerials(serials)
    ag.setSegnames(segnames)
    ag.setResnums(resnums)
    ag.setResnames(resnames)
    ag.setNames(atomnames)
    ag.setTypes(atomtypes)
    ag.setCharges(charges)
    ag.setMasses(masses)

    if n_bonds > 0:
        b_array = add(b_array, -1, b_array)
        ag.setBonds(b_array.reshape((n_bonds, 2)))

    if n_angles > 0:
        a_array = add(a_array, -1, a_array)
        ag.setAngles(a_array.reshape((n_angles, 3)))

    if n_dihedrals > 0:
        d_array = add(d_array, -1, d_array)
        ag.setDihedrals(d_array.reshape((n_dihedrals, 4)))

    if n_impropers > 0:
        i_array = add(i_array, -1, i_array)
        ag.setImpropers(i_array.reshape((n_impropers, 4)))

    if n_donors > 0:
        do_array = add(do_array, -1, do_array)
        ag.setDonors(do_array.reshape((n_donors, 2)))

    if n_acceptors > 0:
        ac_array = add(ac_array, -1, ac_array)
        ag.setAcceptors(ac_array.reshape((n_acceptors, 2)))

    if n_exclusions > 0:
        nbe_array = add(nbe_array, -1, nbe_array)
        ag.setNBExclusions(nbe_array.reshape((n_exclusions, 2)))

    if n_crossterms > 0:
        c_array = add(c_array, -1, c_array)
        ag.setCrossterms(c_array.reshape((n_crossterms, 4)))

    return ag
Example #8
0
def parsePSF(filename, title=None, ag=None):
    """Return an :class:`.AtomGroup` instance storing data parsed from X-PLOR
    format PSF file *filename*.  Atom and bond information is parsed from the
    file.  If *title* is not given, *filename* will be set as the title of the
    :class:`.AtomGroup` instance.  An :class:`.AtomGroup` instance may be
    provided as *ag* argument.  When provided, *ag* must have the same number
    of atoms in the same order as the file.  Data from PSF file will be added
    to the *ag*.  This may overwrite present data if it overlaps with PSF file
    content.  Note that this function does not evaluate angles, dihedrals, and
    impropers sections."""

    if ag is not None:
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')

    psf = openFile(filename, 'rb')
    line = psf.readline()
    i_line = 1
    while line:
        line = line.strip()
        if line.endswith('!NATOM'):
            n_atoms = int(line.split('!')[0])
            break
        line = psf.readline()
        i_line += 1
    if title is None:
        title = os.path.splitext(os.path.split(filename)[1])[0]
    else:
        title = str(title)
    if ag is None:
        ag = AtomGroup(title)
    else:
        if n_atoms != ag.numAtoms():
            raise ValueError('ag and PSF file must have same number of atoms')

    serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype)
    segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype)
    resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype)
    resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype)
    atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype)
    atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype)
    charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype)
    masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype)

    lines = psf.readlines(71 * (n_atoms + 5))
    if len(lines) < n_atoms:
        raise IOError('number of lines in PSF is less than the number of '
                      'atoms')

    for i, line in enumerate(lines):
        if i == n_atoms:
            break
        i_line += 1
        if len(line) <= 71:
            serials[i] = line[:8]
            segnames[i] = line[9:13].strip()
            resnums[i] = line[14:19]
            resnames[i] = line[19:23].strip()
            atomnames[i] = line[24:28].strip()
            atomtypes[i] = line[29:35].strip()
            charges[i] = line[35:44]
            masses[i] = line[50:60]
        else:
            items = line.split()
            serials[i] = items[0]
            segnames[i] = items[1]
            resnums[i] = items[2]
            resnames[i] = items[3]
            atomnames[i] = items[4]
            atomtypes[i] = items[5]
            charges[i] = items[6]
            masses[i] = items[7]

    i = n_atoms
    while 1:
        line = lines[i].split()
        if len(line) >= 2 and line[1] == '!NBOND:':
             n_bonds = int(line[0])
             break
        i += 1
    lines = ''.join(lines[i+1:]) + psf.read(n_bonds/4 * 71)
    array = fromstring(lines, count=n_bonds*2, dtype=int, sep=' ')
    if len(array) != n_bonds*2:
        raise IOError('number of bonds expected and parsed do not match')

    psf.close()
    ag.setSerials(serials)
    ag.setSegnames(segnames)
    ag.setResnums(resnums)
    ag.setResnames(resnames)
    ag.setNames(atomnames)
    ag.setTypes(atomtypes)
    ag.setCharges(charges)
    ag.setMasses(masses)

    array = add(array, -1, array)
    ag.setBonds(array.reshape((n_bonds, 2)))

    return ag
Example #9
0
def parsePDBStream(stream, **kwargs):
    """Returns an :class:`.AtomGroup` and/or dictionary containing header data
    parsed from a stream of PDB lines.

    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)""" 
    
    model = kwargs.get('model')
    header = kwargs.get('header', False)
    assert isinstance(header, bool), 'header must be a boolean'
    chain = kwargs.get('chain')
    subset = kwargs.get('subset')
    altloc = kwargs.get('altloc', 'A')

    if model is not None:
        if isinstance(model, Integral):
            if model < 0:
                raise ValueError('model must be greater than 0')
        else:
            raise TypeError('model must be an integer, {0} is invalid'
                            .format(str(model)))
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'
                             .format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = chain + title_suffix
    ag = kwargs.pop('ag', None)
    if ag is not None:
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    elif model != 0:
        ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
        n_csets = 0

    biomol = kwargs.get('biomol', False)
    auto_secondary = None
    secondary = kwargs.get('secondary')
    if not secondary:
        auto_secondary = SETTINGS.get('auto_secondary')
        secondary = auto_secondary
    split = 0
    hd = None
    if model != 0:
        LOGGER.timeit()
        try:
            lines = stream.readlines()
        except AttributeError as err:
            try:
                lines = stream.read().split('\n')
            except AttributeError:
                raise err
        if not len(lines):
            raise ValueError('empty PDB file or stream')
        if header or biomol or secondary:
            hd, split = getHeaderDict(lines)
        _parsePDBLines(ag, lines, split, model, chain, subset, altloc)
        if ag.numAtoms() > 0:
            LOGGER.report('{0} atoms and {1} coordinate set(s) were '
                          'parsed in %.2fs.'.format(ag.numAtoms(),
                          ag.numCoordsets() - n_csets))
        else:
            ag = None
            LOGGER.warn('Atomic data could not be parsed, please '
                        'check the input file.')
    elif header:
        hd, split = getHeaderDict(stream)

    if ag is not None and isinstance(hd, dict):
        if secondary:
            if auto_secondary:
                try:
                    ag = assignSecstr(hd, ag)
                except ValueError:
                    pass
            else:
                ag = assignSecstr(hd, ag)
        if biomol:
            ag = buildBiomolecules(hd, ag)

            if isinstance(ag, list):
                LOGGER.info('Biomolecular transformations were applied, {0} '
                            'biomolecule(s) are returned.'.format(len(ag)))
            else:
                LOGGER.info('Biomolecular transformations were applied to the '
                            'coordinate data.')

    if model != 0:
        if header:
            return ag, hd
        else:
            return ag
    else:
        return hd
Example #10
0
def parsePDBStream(stream, **kwargs):
    """Returns an :class:`.AtomGroup` and/or dictionary containing header data
    parsed from a stream of PDB lines.

    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    model = kwargs.get('model')
    header = kwargs.get('header', False)
    assert isinstance(header, bool), 'header must be a boolean'
    chain = kwargs.get('chain')
    subset = kwargs.get('subset')
    altloc = kwargs.get('altloc', 'A')
    if model is not None:
        if isinstance(model, Integral):
            if model < 0:
                raise ValueError('model must be greater than 0')
        else:
            raise TypeError('model must be an integer, {0} is invalid'.format(
                str(model)))
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'.format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = chain + title_suffix
    ag = None
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    elif model != 0:
        ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
        n_csets = 0

    biomol = kwargs.get('biomol', False)
    auto_secondary = None
    secondary = kwargs.get('secondary')
    if not secondary:
        auto_secondary = SETTINGS.get('auto_secondary')
        secondary = auto_secondary
    split = 0
    hd = None
    if model != 0:
        LOGGER.timeit()
        try:
            lines = stream.readlines()
        except AttributeError as err:
            try:
                lines = stream.read().split('\n')
            except AttributeError:
                raise err
        if not len(lines):
            raise ValueError('empty PDB file or stream')
        if header or biomol or secondary:
            hd, split = getHeaderDict(lines)
        _parsePDBLines(ag, lines, split, model, chain, subset, altloc)
        if ag.numAtoms() > 0:
            LOGGER.report('{0} atoms and {1} coordinate set(s) were '
                          'parsed in %.2fs.'.format(
                              ag.numAtoms(),
                              ag.numCoordsets() - n_csets))
        else:
            ag = None
            LOGGER.warn('Atomic data could not be parsed, please '
                        'check the input file.')
    elif header:
        hd, split = getHeaderDict(stream)

    if ag is not None and isinstance(hd, dict):
        if secondary:
            if auto_secondary:
                try:
                    ag = assignSecstr(hd, ag)
                except ValueError:
                    pass
            else:
                ag = assignSecstr(hd, ag)
        if biomol:
            ag = buildBiomolecules(hd, ag)

            if isinstance(ag, list):
                LOGGER.info('Biomolecular transformations were applied, {0} '
                            'biomolecule(s) are returned.'.format(len(ag)))
            else:
                LOGGER.info('Biomolecular transformations were applied to the '
                            'coordinate data.')

    if model != 0:
        if header:
            return ag, hd
        else:
            return ag
    else:
        return hd
Example #11
0
def parseMMCIFStream(stream, **kwargs):
    """Returns an :class:`.AtomGroup` and/or a class:`.StarDict` 
    containing header data parsed from a stream of CIF lines.
    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    model = kwargs.get('model')
    subset = kwargs.get('subset')
    chain = kwargs.get('chain')
    altloc = kwargs.get('altloc', 'A')
    header = kwargs.get('header', False)

    if model is not None:
        if isinstance(model, int):
            if model < 0:
                raise ValueError('model must be greater than 0')
        else:
            raise TypeError('model must be an integer, {0} is invalid'.format(
                str(model)))
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'.format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = '_' + chain + title_suffix

    ag = None
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    elif model != 0:
        ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
        n_csets = 0

    if model != 0:
        LOGGER.timeit()
        try:
            lines = stream.readlines()
        except AttributeError as err:
            try:
                lines = stream.read().split('\n')
            except AttributeError:
                raise err
        if not len(lines):
            raise ValueError('empty PDB file or stream')

        if header:
            ag, header = _parseMMCIFLines(ag, lines, model, chain, subset,
                                          altloc, header)
        else:
            ag = _parseMMCIFLines(ag, lines, model, chain, subset, altloc,
                                  header)

        if ag.numAtoms() > 0:
            LOGGER.report('{0} atoms and {1} coordinate set(s) were '
                          'parsed in %.2fs.'.format(
                              ag.numAtoms(),
                              ag.numCoordsets() - n_csets))
        else:
            ag = None
            LOGGER.warn('Atomic data could not be parsed, please '
                        'check the input file.')
        if header:
            return ag, StarDict(*header,
                                title=str(kwargs.get('title', 'Unknown')))
        return ag
Example #12
0
def parsePSF(filename, title=None, ag=None):
    """Return an :class:`.AtomGroup` instance storing data parsed from X-PLOR
    format PSF file *filename*.  Atom and bond information is parsed from the
    file.  If *title* is not given, *filename* will be set as the title of the
    :class:`.AtomGroup` instance.  An :class:`.AtomGroup` instance may be
    provided as *ag* argument.  When provided, *ag* must have the same number
    of atoms in the same order as the file.  Data from PSF file will be added
    to the *ag*.  This may overwrite present data if it overlaps with PSF file
    content.  Note that this function does not evaluate angles, dihedrals, and
    impropers sections."""

    if ag is not None:
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')

    psf = openFile(filename, 'rb')
    line = psf.readline()
    i_line = 1
    while line:
        line = line.strip()
        if line.endswith('!NATOM'):
            n_atoms = int(line.split('!')[0])
            break
        line = psf.readline()
        i_line += 1
    if title is None:
        title = os.path.splitext(os.path.split(filename)[1])[0]
    else:
        title = str(title)
    if ag is None:
        ag = AtomGroup(title)
    else:
        if n_atoms != ag.numAtoms():
            raise ValueError('ag and PSF file must have same number of atoms')

    serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype)
    segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype)
    resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype)
    resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype)
    atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype)
    atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype)
    charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype)
    masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype)

    lines = psf.readlines(71 * (n_atoms + 5))
    if len(lines) < n_atoms:
        raise IOError('number of lines in PSF is less than the number of '
                      'atoms')

    for i, line in enumerate(lines):
        if i == n_atoms:
            break
        i_line += 1
        if len(line) <= 71:
            serials[i] = line[:8]
            segnames[i] = line[9:13].strip()
            resnums[i] = line[14:19]
            resnames[i] = line[19:23].strip()
            atomnames[i] = line[24:28].strip()
            atomtypes[i] = line[29:35].strip()
            charges[i] = line[35:44]
            masses[i] = line[50:60]
        else:
            items = line.split()
            serials[i] = items[0]
            segnames[i] = items[1]
            resnums[i] = items[2]
            resnames[i] = items[3]
            atomnames[i] = items[4]
            atomtypes[i] = items[5]
            charges[i] = items[6]
            masses[i] = items[7]

    i = n_atoms
    while 1:
        line = lines[i].split()
        if len(line) >= 2 and line[1] == '!NBOND:':
            n_bonds = int(line[0])
            break
        i += 1
    lines = ''.join(lines[i + 1:]) + psf.read(n_bonds / 4 * 71)
    array = fromstring(lines, count=n_bonds * 2, dtype=int, sep=' ')
    if len(array) != n_bonds * 2:
        raise IOError('number of bonds expected and parsed do not match')

    psf.close()
    ag.setSerials(serials)
    ag.setSegnames(segnames)
    ag.setResnums(resnums)
    ag.setResnames(resnames)
    ag.setNames(atomnames)
    ag.setTypes(atomtypes)
    ag.setCharges(charges)
    ag.setMasses(masses)

    array = add(array, -1, array)
    ag.setBonds(array.reshape((n_bonds, 2)))

    return ag
Example #13
0
def parsePSF(filename, title=None, ag=None):
    """Returns an :class:`.AtomGroup` instance storing data parsed from X-PLOR
    format PSF file *filename*.  Atom and bond information is parsed from the
    file.  If *title* is not given, *filename* will be set as the title of the
    :class:`.AtomGroup` instance.  An :class:`.AtomGroup` instance may be
    provided as *ag* argument.  When provided, *ag* must have the same number
    of atoms in the same order as the file.  Data from PSF file will be added
    to the *ag*.  This may overwrite present data if it overlaps with PSF file
    content.  Note that this function does not evaluate angles, dihedrals, and
    impropers sections."""

    if ag is not None:
        if not isinstance(ag, AtomGroup):
            raise TypeError("ag must be an AtomGroup instance")

    psf = openFile(filename, "rb")
    line = psf.readline()
    i_line = 1
    while line:
        line = line.strip()
        if line.endswith(b"!NATOM"):
            n_atoms = int(line.split(b"!")[0])
            break
        line = psf.readline()
        i_line += 1
    if title is None:
        title = os.path.splitext(os.path.split(filename)[1])[0]
    else:
        title = str(title)
    if ag is None:
        ag = AtomGroup(title)
    else:
        if n_atoms != ag.numAtoms():
            raise ValueError("ag and PSF file must have same number of atoms")

    serials = zeros(n_atoms, ATOMIC_FIELDS["serial"].dtype)
    segnames = zeros(n_atoms, ATOMIC_FIELDS["segment"].dtype)
    resnums = zeros(n_atoms, ATOMIC_FIELDS["resnum"].dtype)
    resnames = zeros(n_atoms, ATOMIC_FIELDS["resname"].dtype)
    atomnames = zeros(n_atoms, ATOMIC_FIELDS["name"].dtype)
    atomtypes = zeros(n_atoms, ATOMIC_FIELDS["type"].dtype)
    charges = zeros(n_atoms, ATOMIC_FIELDS["charge"].dtype)
    masses = zeros(n_atoms, ATOMIC_FIELDS["mass"].dtype)

    # lines = psf.readlines(71 * (n_atoms + 5))
    n = 0
    n_bonds = 0
    for i, line in enumerate(psf):
        if line.strip() == b"":
            continue
        if b"!NBOND:" in line.upper():
            items = line.split()
            n_bonds = int(items[0])
            break
        if n + 1 > n_atoms:
            continue

        if len(line) <= 71:
            serials[n] = line[:8]
            segnames[n] = line[9:13].strip()
            resnums[n] = line[14:19]
            resnames[n] = line[19:23].strip()
            atomnames[n] = line[24:28].strip()
            atomtypes[n] = line[29:35].strip()
            charges[n] = line[35:44]
            masses[n] = line[50:60]
        else:
            items = line.split()
            serials[n] = items[0]
            segnames[n] = items[1]
            resnums[n] = items[2]
            resnames[n] = items[3]
            atomnames[n] = items[4]
            atomtypes[n] = items[5]
            charges[n] = items[6]
            masses[n] = items[7]
        n += 1

    if n < n_atoms:
        raise IOError("number of lines in PSF is less than the number of " "atoms")

    #    i = n_atoms
    #    while 1:
    #        line = lines[i].split()
    #        if len(line) >= 2 and line[1] == '!NBOND:':
    #             n_bonds = int(line[0])
    #             break
    #        i += 1
    #    lines = ''.join(lines[i+1:]) + psf.read(n_bonds/4 * 71)
    lines = []
    for i, line in enumerate(psf):
        if line.strip() == b"":
            continue
        if b"!" in line:
            break
        lines.append(line.decode(encoding="UTF-8"))

    lines = "".join(lines)
    array = fromstring(lines, count=n_bonds * 2, dtype=int, sep=" ")
    if len(array) != n_bonds * 2:
        raise IOError("number of bonds expected and parsed do not match")

    psf.close()
    ag.setSerials(serials)
    ag.setSegnames(segnames)
    ag.setResnums(resnums)
    ag.setResnames(resnames)
    ag.setNames(atomnames)
    ag.setTypes(atomtypes)
    ag.setCharges(charges)
    ag.setMasses(masses)

    array = add(array, -1, array)
    ag.setBonds(array.reshape((n_bonds, 2)))

    return ag
Example #14
0
def parseCIFStream(stream, **kwargs):
    """Returns an :class:`.AtomGroup` and/or dictionary containing header data
    parsed from a stream of CIF lines.
    :arg stream: Anything that implements the method ``readlines``
        (e.g. :class:`file`, buffer, stdin)"""

    model = kwargs.get('model')
    subset = kwargs.get('subset')
    chain = kwargs.get('chain')
    altloc = kwargs.get('altloc', 'A')

    if model is not None:
        if isinstance(model, int):
            if model < 0:
                raise ValueError('model must be greater than 0')
        else:
            raise TypeError('model must be an integer, {0} is invalid'
                            .format(str(model)))
    title_suffix = ''
    if subset:
        try:
            subset = _PDBSubsets[subset.lower()]
        except AttributeError:
            raise TypeError('subset must be a string')
        except KeyError:
            raise ValueError('{0} is not a valid subset'
                             .format(repr(subset)))
        title_suffix = '_' + subset
    if chain is not None:
        if not isinstance(chain, str):
            raise TypeError('chain must be a string')
        elif len(chain) == 0:
            raise ValueError('chain must not be an empty string')
        title_suffix = '_' + chain + title_suffix

    ag = None
    if 'ag' in kwargs:
        ag = kwargs['ag']
        if not isinstance(ag, AtomGroup):
            raise TypeError('ag must be an AtomGroup instance')
        n_csets = ag.numCoordsets()
    elif model != 0:
        ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix)
        n_csets = 0

    if model != 0:
        LOGGER.timeit()
        try:
            lines = stream.readlines()
        except AttributeError as err:
            try:
                lines = stream.read().split('\n')
            except AttributeError:
                raise err
        if not len(lines):
            raise ValueError('empty PDB file or stream')
        ag = _parseCIFLines(ag, lines, model, chain, subset, altloc)
        if ag.numAtoms() > 0:
            LOGGER.report('{0} atoms and {1} coordinate set(s) were '
                          'parsed in %.2fs.'.format(ag.numAtoms(),
                           ag.numCoordsets() - n_csets))
        else:
            ag = None
            LOGGER.warn('Atomic data could not be parsed, please '
            'check the input file.')
        return ag