Esempio n. 1
0
    def _parse(self, psffile):
        """ Parse a psf file.

        Args:
            psffile : str, path to the psf file
        Returns:
            a Molecule instance or False

        """

        self.lgr.debug("parsing psf file: %s" % psffile)

        t1 = time.time()

        # check if the file exists
        if not os.path.exists(psffile):
            self.lgr.critical("file doesn't exist")
            return False

        # initialize empty list for data
        mol = blocks.Molecule()

        # supported formats
        psf_formats = {
            'NAMD': {
                'sections': {
                    '!NATOM': {
                        'type': 'atom',
                        'n': (9, 11),
                        'multiple': False,
                        'func': self._atom_line
                    },
                    '!NBOND': {
                        'type': 'bond',
                        'n': 2,
                        'multiple': True,
                        'func': self._badi_line
                    },
                    '!NTHETA': {
                        'type': 'angle',
                        'n': 3,
                        'multiple': True,
                        'func': self._badi_line
                    },
                    '!NPHI': {
                        'type': 'dihedral',
                        'n': 4,
                        'multiple': True,
                        'func': self._badi_line
                    },
                    '!NIMPHI': {
                        'type': 'improper',
                        'n': 4,
                        'multiple': True,
                        'func': self._badi_line
                    },
                    '!NCRTERM': {
                        'type': 'cmap',
                        'n': 8,
                        'multiple': False,
                        'func': self._badi_line
                    },
                },
            },
        }

        # find the psf format
        with open(psffile) as f:
            for line in f:
                psffmt = self._find_psf_format(line)
                break

        # check if the format is valid
        if psffmt is False:
            # assume the format is 'NAMD'
            psffmt = 'NAMD'
            #return False

        elif psffmt not in list(psf_formats.keys()):
            self.lgr.error("psf format '%s' is not supported" % (psffmt))
            return False

        # parse the file
        _sec = None
        known_sections = list(psf_formats[psffmt]['sections'].keys())

        with open(psffile) as f:
            for line in f:
                line = line.strip()

                if line == '':
                    continue

                if '!' in line:
                    if line.split()[1].strip(':') in known_sections:
                        _sec = line.split()[1].strip(':')
                    else:
                        self.lgr.debug("skipping section: '%s'" % line)
                        _sec = None
                    continue

                if _sec is not None:
                    _conf = psf_formats[psffmt]['sections'][_sec]
                    result = _conf['func'](psffmt, line, _conf, mol)
                    if result is False:
                        self.lgr.error(
                            "couldn't parse this line in '%s' section:\n  %s" %
                            (_sec, line))
                        return False

        # build chain and residues
        build_res_chain(mol)
        build_pairs(mol, 'charmm')

        t2 = time.time()
        self.lgr.debug("parsing took %4.1f seconds" % (t2 - t1))

        return mol
Esempio n. 2
0
    def split_psf(self):
        """Convert a psf Molecule to multiple Molecules.

        Using this function only makes sense if the segments in the PSF file
            are not covalently bond (usually this the case).

        Args:
           temp_mol : a psf Molecule instance

        Returns:
           list of Molecules or False

        """

        assert len(self.molecules) == 1
        temp_mol = self.molecules[0]

        self.lgr.debug("converting psf to multiple molecules based on chains")

        unique_chains = set([chain.name for chain in temp_mol.chains])
        if len(unique_chains) != len(temp_mol.chains):
            self.lgr.error("the name of the chains is not unique")
            return False

        # counter for different elements in the psf file
        _NA = _B = _A = _D = _I = _C = _NP = 0
        molecules = []

        # Atom:Chain.name dictionary for easy lookup
        _AC_map = {}
        for atom in temp_mol.atoms:
            _AC_map[atom] = atom.residue.chain.name

        for chain in temp_mol.chains:
            m = blocks.Molecule()
            chainname = chain.name

            for res in chain.residues:
                for atom in res.atoms:
                    m.atoms.append(atom)
                    _NA += 1

            for b in temp_mol.bonds:
                if _AC_map[b.atom1] == chainname and _AC_map[
                        b.atom2] == chainname:

                    m.bonds.append(b)
                    _B += 1

            for a in temp_mol.angles:
                if _AC_map[a.atom1] == chainname and _AC_map[a.atom2] == chainname and \
                   _AC_map[a.atom3] == chainname:

                    m.angles.append(a)
                    _A += 1

            for d in temp_mol.dihedrals:
                if _AC_map[d.atom1] == chainname and _AC_map[d.atom2] == chainname and \
                   _AC_map[d.atom3] == chainname and _AC_map[d.atom4] == chainname:

                    m.dihedrals.append(d)
                    _D += 1

            for i in temp_mol.impropers:
                if _AC_map[i.atom1] == chainname and _AC_map[i.atom2] == chainname and \
                   _AC_map[i.atom3] == chainname and _AC_map[i.atom4] == chainname:

                    m.impropers.append(i)
                    _I += 1

            for c in temp_mol.cmaps:
                if _AC_map[c.atom1] == chainname and _AC_map[c.atom2] == chainname and \
                   _AC_map[c.atom3] == chainname and _AC_map[c.atom4] == chainname and \
                   _AC_map[c.atom5] == chainname and _AC_map[c.atom6] == chainname and \
                   _AC_map[c.atom7] == chainname and _AC_map[c.atom8] == chainname:

                    m.cmaps.append(c)
                    _C += 1

            for p in temp_mol.pairs:
                if _AC_map[p.atom1] == chainname and _AC_map[
                        p.atom2] == chainname:

                    m.pairs.append(p)
                    _NP += 1

            build_res_chain(m)
            m.renumber_atoms()
            molecules.append(m)

        # make sure we used all the enteties in the temp_mol
        assert len(temp_mol.atoms) == _NA
        assert len(
            temp_mol.bonds) == _B, '%d != %d' % (len(temp_mol.bonds), _B)
        assert len(temp_mol.angles) == _A
        assert len(temp_mol.dihedrals) == _D
        assert len(temp_mol.impropers) == _I
        assert len(temp_mol.cmaps) == _C
        assert len(temp_mol.pairs) == _NP

        self.molecules = tuple(molecules)
Esempio n. 3
0
    def _parse(self, psffile):
        """ Parse a psf file.

        Args:
            psffile : str, path to the psf file
        Returns:
            a Molecule instance or False

        """

        self.lgr.debug("parsing psf file: %s" % psffile)

        t1 = time.time()

        # check if the file exists
        if not os.path.exists(psffile):
            self.lgr.critical("file doesn't exist")
            return False

        # initialize empty list for data
        mol = blocks.Molecule()

        # supported formats
        psf_formats = {
            'NAMD': {
                'sections': {
                    '!NATOM':  {'type':'atom',    'n':(9,11),'multiple':False,'func':self._atom_line},
                    '!NBOND':  {'type':'bond',    'n':2,'multiple':True, 'func':self._badi_line},
                    '!NTHETA': {'type':'angle',   'n':3,'multiple':True, 'func':self._badi_line},
                    '!NPHI':   {'type':'dihedral','n':4,'multiple':True, 'func':self._badi_line},
                    '!NIMPHI': {'type':'improper','n':4,'multiple':True, 'func':self._badi_line},
                    '!NCRTERM':{'type':'cmap',    'n':8,'multiple':False,'func':self._badi_line},
                },
            },
        }


        # find the psf format
        with open(psffile) as f:
            for line in f:
                psffmt = self._find_psf_format(line)
                break

        # check if the format is valid
        if psffmt is False:
            # assume the format is 'NAMD'
            psffmt = 'NAMD'
            #return False

        elif psffmt not in list(psf_formats.keys()):
            self.lgr.error("psf format '%s' is not supported" % (psffmt))
            return False

        # parse the file
        _sec = None
        known_sections = list(psf_formats[psffmt]['sections'].keys())

        with open(psffile) as f:
            for line in f:
                line = line.strip()

                if line == '':
                    continue

                if '!' in line:
                    if line.split()[1].strip(':') in known_sections:
                        _sec = line.split()[1].strip(':')
                    else:
                        self.lgr.debug("skipping section: '%s'" % line)
                        _sec = None
                    continue

                if _sec is not None:
                    _conf = psf_formats[psffmt]['sections'][_sec]
                    result = _conf['func'](psffmt, line, _conf, mol)
                    if result is False:
                        self.lgr.error("couldn't parse this line in '%s' section:\n  %s" % (_sec, line))
                        return False

        # build chain and residues
        build_res_chain(mol)
        build_pairs(mol, 'charmm')

        t2 = time.time()
        self.lgr.debug("parsing took %4.1f seconds" % (t2-t1))

        return mol
Esempio n. 4
0
    def split_psf(self):
        """Convert a psf Molecule to multiple Molecules.

        Using this function only makes sense if the segments in the PSF file
            are not covalently bond (usually this the case).

        Args:
           temp_mol : a psf Molecule instance

        Returns:
           list of Molecules or False

        """

        assert len(self.molecules) == 1
        temp_mol = self.molecules[0]

        self.lgr.debug("converting psf to multiple molecules based on chains")

        unique_chains = set([chain.name for chain in temp_mol.chains])
        if len(unique_chains) != len(temp_mol.chains):
            self.lgr.error("the name of the chains is not unique")
            return False


        # counter for different elements in the psf file
        _NA= _B = _A = _D = _I = _C  = _NP = 0
        molecules = []


        # Atom:Chain.name dictionary for easy lookup
        _AC_map = {}
        for atom in temp_mol.atoms:
            _AC_map[atom] = atom.residue.chain.name


        for chain in temp_mol.chains:
            m = blocks.Molecule()
            chainname = chain.name

            for res in chain.residues:
                for atom in res.atoms:
                    m.atoms.append(atom)
                    _NA += 1

            for b in temp_mol.bonds:
                if _AC_map[b.atom1] == chainname and _AC_map[b.atom2] == chainname:

                    m.bonds.append(b)
                    _B += 1

            for a in temp_mol.angles:
                if _AC_map[a.atom1] == chainname and _AC_map[a.atom2] == chainname and \
                   _AC_map[a.atom3] == chainname:

                    m.angles.append(a)
                    _A += 1

            for d in temp_mol.dihedrals:
                if _AC_map[d.atom1] == chainname and _AC_map[d.atom2] == chainname and \
                   _AC_map[d.atom3] == chainname and _AC_map[d.atom4] == chainname:

                    m.dihedrals.append(d)
                    _D += 1

            for i in temp_mol.impropers:
                if _AC_map[i.atom1] == chainname and _AC_map[i.atom2] == chainname and \
                   _AC_map[i.atom3] == chainname and _AC_map[i.atom4] == chainname:

                    m.impropers.append(i)
                    _I += 1


            for c in temp_mol.cmaps:
                if _AC_map[c.atom1] == chainname and _AC_map[c.atom2] == chainname and \
                   _AC_map[c.atom3] == chainname and _AC_map[c.atom4] == chainname and \
                   _AC_map[c.atom5] == chainname and _AC_map[c.atom6] == chainname and \
                   _AC_map[c.atom7] == chainname and _AC_map[c.atom8] == chainname:

                    m.cmaps.append(c)
                    _C += 1

            for p in temp_mol.pairs:
                if _AC_map[p.atom1] == chainname and _AC_map[p.atom2] == chainname:

                    m.pairs.append(p)
                    _NP += 1



            build_res_chain(m)
            m.renumber_atoms()
            molecules.append(m)

        # make sure we used all the enteties in the temp_mol
        assert len(temp_mol.atoms)     == _NA
        assert len(temp_mol.bonds)     == _B, '%d != %d' % (len(temp_mol.bonds), _B)
        assert len(temp_mol.angles)    == _A
        assert len(temp_mol.dihedrals) == _D
        assert len(temp_mol.impropers) == _I
        assert len(temp_mol.cmaps)     == _C
        assert len(temp_mol.pairs)     == _NP

        self.molecules = tuple(molecules)
Esempio n. 5
0
    def _parse(self, pdbfile, guess_mols):
        self.lgr.debug("parsing pdb file: %s" % pdbfile)

        t1 = time.time()

        if not os.path.exists(pdbfile):
            self.lgr.error("the pdbfile doesn't exist")
            return

        _first_model_finished = False
        atoms     = []
        molecules = []
        _i = 0   # a counter for atom index
        _alt_loc_warning = False

        # read the file and create atoms list
        M = blocks.Molecule()
        molecules.append(M)
        with open(pdbfile) as f:
            for line in f:
                line = line.strip()
                if line.startswith('ENDMDL'):
                    _first_model_finished = True   # set by first ENDMDL
                    _i = 0   # reset _i

                if _first_model_finished:
                    # just read the coordinates
                    if line.startswith(('ATOM', 'HETATM')):
                        c = list(map(float, (line[30:38], line[38:46], line[46:54])))
                        atoms[_i].coords.append(c)
                        _i += 1
                else:
                    if line.startswith(('ATOM', 'HETATM')):
                        a = blocks.Atom()
                        a.flag   = line[0:6].strip()
                        a.number = self.conv_atom_number(line[6:11])
                        a.name   = line[12:16].strip()
                        a.altloc = line[16].strip()
                        if a.altloc != '' and _alt_loc_warning is False:
                            _alt_loc_warning = True
                        a.resname= line[17:21].strip()
                        a.chain  = line[21].strip()
                        a.resnumb= int(line[22:26])
                        c = list(map(float, (line[30:38], line[38:46], line[46:54])))
                        a.coords = [tuple(c)]  # a list of (x,y,z) tuples, each tuple for one model

                        #TODO occup, bfactor, ...

                        if guess_mols:
                            if len(atoms) > 0:
                                if line.startswith('HETATM'):
                                    if a.resname != atoms[-1].resname or a.resnumb != atoms[-1].resnumb:
                                        M = blocks.Molecule()
                                        molecules.append(M)
                                else: # ATOM
                                    if atoms[-1].flag == 'HETATM':
                                        M = blocks.Molecule()
                                        molecules.append(M)

                        # record a in the local atoms and M.atoms
                        atoms.append(a)
                        M.atoms.append(a)

        if len(atoms) == 0:
            self.lgr.warning("no atoms were found in the pdb file")
            return

        self.atoms = tuple(atoms)

        if _alt_loc_warning:
            self.lgr.warning("there are atom records with altloc flags - fix this")

        # make sure all the atomic coordinates are the same length
        for a in atoms:
            assert len(a.coords) == len(atoms[0].coords)

        # build residue and chains
        for m in molecules:
            build_res_chain(m)

        self.molecules = tuple(molecules)


        t2 = time.time()
        self.lgr.debug("parsing took %4.1f seconds" % (t2-t1))
Esempio n. 6
0
    def _parse(self, pdbfile, guess_mols):
        self.lgr.debug("parsing pdb file: %s" % pdbfile)

        t1 = time.time()

        if not os.path.exists(pdbfile):
            self.lgr.error("the pdbfile doesn't exist")
            return

        _first_model_finished = False
        atoms = []
        molecules = []
        _i = 0  # a counter for atom index
        _alt_loc_warning = False

        # read the file and create atoms list
        M = blocks.Molecule()
        molecules.append(M)
        with open(pdbfile) as f:
            for line in f:
                line = line.strip()
                if line.startswith('ENDMDL'):
                    _first_model_finished = True  # set by first ENDMDL
                    _i = 0  # reset _i

                if _first_model_finished:
                    # just read the coordinates
                    if line.startswith(('ATOM', 'HETATM')):
                        c = list(
                            map(float,
                                (line[30:38], line[38:46], line[46:54])))
                        atoms[_i].coords.append(c)
                        _i += 1
                else:
                    if line.startswith(('ATOM', 'HETATM')):
                        a = blocks.Atom()
                        a.flag = line[0:6].strip()
                        a.number = self.conv_atom_number(line[6:11])
                        a.name = line[12:16].strip()
                        a.altloc = line[16].strip()
                        if a.altloc != '' and _alt_loc_warning is False:
                            _alt_loc_warning = True
                        a.resname = line[17:21].strip()
                        a.chain = line[21].strip()
                        a.resnumb = int(line[22:26])
                        c = list(
                            map(float,
                                (line[30:38], line[38:46], line[46:54])))
                        a.coords = [
                            tuple(c)
                        ]  # a list of (x,y,z) tuples, each tuple for one model

                        #TODO occup, bfactor, ...

                        if guess_mols:
                            if len(atoms) > 0:
                                if line.startswith('HETATM'):
                                    if a.resname != atoms[
                                            -1].resname or a.resnumb != atoms[
                                                -1].resnumb:
                                        M = blocks.Molecule()
                                        molecules.append(M)
                                else:  # ATOM
                                    if atoms[-1].flag == 'HETATM':
                                        M = blocks.Molecule()
                                        molecules.append(M)

                        # record a in the local atoms and M.atoms
                        atoms.append(a)
                        M.atoms.append(a)

        if len(atoms) == 0:
            self.lgr.warning("no atoms were found in the pdb file")
            return

        self.atoms = tuple(atoms)

        if _alt_loc_warning:
            self.lgr.warning(
                "there are atom records with altloc flags - fix this")

        # make sure all the atomic coordinates are the same length
        for a in atoms:
            assert len(a.coords) == len(atoms[0].coords)

        # build residue and chains
        for m in molecules:
            build_res_chain(m)

        self.molecules = tuple(molecules)

        t2 = time.time()
        self.lgr.debug("parsing took %4.1f seconds" % (t2 - t1))