def parseEMDStream(stream, **kwargs): """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file. :arg stream: Anything that implements the method ``readlines`` (e.g. :class:`file`, buffer, stdin)""" cutoff = float(kwargs.get('cutoff', 1.20)) n_nodes = int(kwargs.get('n_nodes', 1000)) num_iter = int(kwargs.get('num_iter', 20)) ag = None title_suffix = '' if 'ag' in kwargs: ag = kwargs['ag'] if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') n_csets = ag.numCoordsets() else: ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix) n_csets = 0 biomol = kwargs.get('biomol', False) hd = None LOGGER.warn('Building coordinates from electron density map. This may take a while.') LOGGER.timeit() _parseEMDLines(ag, stream, cutoff=cutoff, n_nodes=n_nodes, num_iter=num_iter, format='EMD') LOGGER.report('{0} atoms and {1} coordinate sets were ' 'parsed in %.2fs.'.format(ag.numAtoms(), ag.numCoordsets() - n_csets)) return ag
def parsePQR(filename, **kwargs): """Returns an :class:`.AtomGroup` containing data parsed from PDB lines. :arg filename: a PQR filename :type filename: str""" title = kwargs.get('title', kwargs.get('name')) model = 1 header = False chain = kwargs.get('chain') subset = kwargs.get('subset') altloc = kwargs.get('altloc', 'A') max_n_atoms = kwargs.get('max_n_atoms', 1e5) if not os.path.isfile(filename): raise IOError('No such file: {0}'.format(repr(filename))) if title is None: fn, ext = os.path.splitext(os.path.split(filename)[1]) if ext == '.gz': fn, ext = os.path.splitext(fn) title = fn.lower() title_suffix = '' if subset: try: subset = _PDBSubsets[subset.lower()] except AttributeError: raise TypeError('subset must be a string') except KeyError: raise ValueError('{0} is not a valid subset' .format(repr(subset))) title_suffix = '_' + subset if chain is not None: if not isinstance(chain, str): raise TypeError('chain must be a string') elif len(chain) == 0: raise ValueError('chain must not be an empty string') title_suffix = '_' + chain + title_suffix if 'ag' in kwargs: ag = kwargs['ag'] if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') n_csets = ag.numCoordsets() else: ag = AtomGroup(title + title_suffix) n_csets = 0 pqr = openFile(filename, 'rt') lines = pqr.readlines() pqr.close() LOGGER.timeit() ag = _parsePDBLines(ag, lines, split=0, model=1, chain=chain, subset=subset, altloc_torf=False, format='pqr', max_n_atoms=max_n_atoms) if ag.numAtoms() > 0: LOGGER.report('{0} atoms and {1} coordinate sets were ' 'parsed in %.2fs.'.format(ag.numAtoms(), ag.numCoordsets() - n_csets)) return ag else: return None
def parsePQR(filename, **kwargs): """Returns an :class:`.AtomGroup` containing data parsed from PDB lines. :arg filename: a PQR filename :type filename: str""" title = kwargs.get('title', kwargs.get('name')) chain = kwargs.get('chain') subset = kwargs.get('subset') if not os.path.isfile(filename): raise IOError('No such file: {0}'.format(repr(filename))) if title is None: fn, ext = os.path.splitext(os.path.split(filename)[1]) if ext == '.gz': fn, ext = os.path.splitext(fn) title = fn.lower() title_suffix = '' if subset: try: subset = _PDBSubsets[subset.lower()] except AttributeError: raise TypeError('subset must be a string') except KeyError: raise ValueError('{0} is not a valid subset'.format(repr(subset))) title_suffix = '_' + subset if chain is not None: if not isinstance(chain, str): raise TypeError('chain must be a string') elif len(chain) == 0: raise ValueError('chain must not be an empty string') title_suffix = '_' + chain + title_suffix if 'ag' in kwargs: ag = kwargs['ag'] if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') n_csets = ag.numCoordsets() else: ag = AtomGroup(title + title_suffix) n_csets = 0 pqr = openFile(filename, 'rt') lines = pqr.readlines() pqr.close() LOGGER.timeit() ag = _parsePDBLines(ag, lines, split=0, model=1, chain=chain, subset=subset, altloc_torf=False, format='pqr') if ag.numAtoms() > 0: LOGGER.report('{0} atoms and {1} coordinate sets were ' 'parsed in %.2fs.'.format(ag.numAtoms(), ag.numCoordsets() - n_csets)) return ag else: return None
def parseEMDStream(stream, **kwargs): """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file. :arg stream: Any object with the method ``readlines`` (e.g. :class:`file`, buffer, stdin)""" cutoff = kwargs.get('cutoff', None) if cutoff is not None: cutoff = float(cutoff) n_nodes = kwargs.get('n_nodes', 0) num_iter = int(kwargs.get('num_iter', 20)) map = kwargs.get('map', False) make_nodes = kwargs.get('make_nodes', False) if n_nodes > 0: make_nodes = True n_nodes = int(n_nodes) if map is False and make_nodes is False: LOGGER.warn( 'At least one of map and make_nodes should be True. ' 'Setting map to False was an intentional change from the default ' 'behaviour so make_nodes has been set to True with n_nodes=1000.') make_nodes = True n_nodes = 1000 title_suffix = kwargs.get('title_suffix', '') atomgroup = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix) atomgroup._n_atoms = n_nodes if make_nodes: LOGGER.info( 'Building coordinates from electron density map. This may take a while.' ) LOGGER.timeit() if map: emd, atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \ num_iter=num_iter, map=map, make_nodes=make_nodes) else: atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \ num_iter=num_iter, map=map, make_nodes=make_nodes) LOGGER.report('{0} pseudoatoms were fitted in %.2fs.'.format( atomgroup.numAtoms(), atomgroup.numCoordsets())) else: emd = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \ num_iter=num_iter, map=map, make_nodes=make_nodes) if make_nodes: if map: return emd, atomgroup else: return atomgroup else: return emd
def parseEMDStream(stream, **kwargs): """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file. :arg stream: Anything that implements the method ``readlines`` (e.g. :class:`file`, buffer, stdin)""" cutoff = kwargs.get('cutoff', None) if cutoff is not None: cutoff = float(cutoff) n_nodes = int(kwargs.get('n_nodes', 1000)) num_iter = int(kwargs.get('num_iter', 20)) return_map = kwargs.get('return_map',False) make_nodes = kwargs.get('make_nodes',False) if return_map is False and make_nodes is False: LOGGER.warn('At least one of return_map and make_nodes should be True. ' 'Setting make_nodes to False was an intentional change from the default ' 'so return_map has been set to True.') kwargs['return_map'] = True title_suffix = kwargs.get('title_suffix','') atomgroup = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix) if make_nodes: LOGGER.info('Building coordinates from electron density map. This may take a while.') LOGGER.timeit() if return_map: emd, atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \ num_iter=num_iter, return_map=return_map, \ make_nodes=make_nodes) else: atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \ num_iter=num_iter, return_map=return_map, \ make_nodes=make_nodes) LOGGER.report('{0} atoms and {1} coordinate sets were ' 'parsed in %.2fs.'.format(atomgroup.numAtoms(), atomgroup.numCoordsets())) else: emd = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \ num_iter=num_iter, return_map=return_map, \ make_nodes=make_nodes) if make_nodes: if return_map: return emd, atomgroup else: return atomgroup else: return emd
def parseEMDStream(stream, **kwargs): """ Returns an :class:`.AtomGroup` containing EMD data parsed from a stream of EMD file. :arg stream: Any object with the method ``readlines`` (e.g. :class:`file`, buffer, stdin)""" cutoff = kwargs.get('cutoff', None) if cutoff is not None: cutoff = float(cutoff) n_nodes = int(kwargs.get('n_nodes', 1000)) num_iter = int(kwargs.get('num_iter', 20)) map = kwargs.get('map',True) make_nodes = kwargs.get('make_nodes',False) if map is False and make_nodes is False: LOGGER.warn('At least one of map and make_nodes should be True. ' 'Setting map to False was an intentional change from the default ' 'behaviour so make_nodes has been set to True.') make_nodes = True title_suffix = kwargs.get('title_suffix','') atomgroup = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix) atomgroup._n_atoms = n_nodes if make_nodes: LOGGER.info('Building coordinates from electron density map. This may take a while.') LOGGER.timeit() if map: emd, atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \ num_iter=num_iter, map=map, make_nodes=make_nodes) else: atomgroup = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \ num_iter=num_iter, map=map, make_nodes=make_nodes) LOGGER.report('{0} atoms and {1} coordinate sets were ' 'parsed in %.2fs.'.format(atomgroup.numAtoms(), atomgroup.numCoordsets())) else: emd = _parseEMDLines(atomgroup, stream, cutoff=cutoff, n_nodes=n_nodes, \ num_iter=num_iter, map=map, make_nodes=make_nodes) if make_nodes: if map: return emd, atomgroup else: return atomgroup else: return emd
def parsePDBStream(stream, **kwargs): """Returns an :class:`.AtomGroup` and/or dictionary containing header data parsed from a stream of PDB lines. :arg stream: Anything that implements the method ``readlines`` (e.g. :class:`file`, buffer, stdin)""" model = kwargs.get('model') header = kwargs.get('header', False) assert isinstance(header, bool), 'header must be a boolean' chain = kwargs.get('chain') subset = kwargs.get('subset') altloc = kwargs.get('altloc', 'A') if model is not None: if isinstance(model, Integral): if model < 0: raise ValueError('model must be greater than 0') else: raise TypeError('model must be an integer, {0} is invalid' .format(str(model))) title_suffix = '' if subset: try: subset = _PDBSubsets[subset.lower()] except AttributeError: raise TypeError('subset must be a string') except KeyError: raise ValueError('{0} is not a valid subset' .format(repr(subset))) title_suffix = '_' + subset if chain is not None: if not isinstance(chain, str): raise TypeError('chain must be a string') elif len(chain) == 0: raise ValueError('chain must not be an empty string') title_suffix = chain + title_suffix ag = kwargs.pop('ag', None) if ag is not None: if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') n_csets = ag.numCoordsets() elif model != 0: ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix) n_csets = 0 biomol = kwargs.get('biomol', False) auto_secondary = None secondary = kwargs.get('secondary') if not secondary: auto_secondary = SETTINGS.get('auto_secondary') secondary = auto_secondary split = 0 hd = None if model != 0: LOGGER.timeit() try: lines = stream.readlines() except AttributeError as err: try: lines = stream.read().split('\n') except AttributeError: raise err if not len(lines): raise ValueError('empty PDB file or stream') if header or biomol or secondary: hd, split = getHeaderDict(lines) _parsePDBLines(ag, lines, split, model, chain, subset, altloc) if ag.numAtoms() > 0: LOGGER.report('{0} atoms and {1} coordinate set(s) were ' 'parsed in %.2fs.'.format(ag.numAtoms(), ag.numCoordsets() - n_csets)) else: ag = None LOGGER.warn('Atomic data could not be parsed, please ' 'check the input file.') elif header: hd, split = getHeaderDict(stream) if ag is not None and isinstance(hd, dict): if secondary: if auto_secondary: try: ag = assignSecstr(hd, ag) except ValueError: pass else: ag = assignSecstr(hd, ag) if biomol: ag = buildBiomolecules(hd, ag) if isinstance(ag, list): LOGGER.info('Biomolecular transformations were applied, {0} ' 'biomolecule(s) are returned.'.format(len(ag))) else: LOGGER.info('Biomolecular transformations were applied to the ' 'coordinate data.') if model != 0: if header: return ag, hd else: return ag else: return hd
def parsePDBStream(stream, **kwargs): """Returns an :class:`.AtomGroup` and/or dictionary containing header data parsed from a stream of PDB lines. :arg stream: Anything that implements the method ``readlines`` (e.g. :class:`file`, buffer, stdin)""" model = kwargs.get('model') header = kwargs.get('header', False) assert isinstance(header, bool), 'header must be a boolean' chain = kwargs.get('chain') subset = kwargs.get('subset') altloc = kwargs.get('altloc', 'A') if model is not None: if isinstance(model, Integral): if model < 0: raise ValueError('model must be greater than 0') else: raise TypeError('model must be an integer, {0} is invalid'.format( str(model))) title_suffix = '' if subset: try: subset = _PDBSubsets[subset.lower()] except AttributeError: raise TypeError('subset must be a string') except KeyError: raise ValueError('{0} is not a valid subset'.format(repr(subset))) title_suffix = '_' + subset if chain is not None: if not isinstance(chain, str): raise TypeError('chain must be a string') elif len(chain) == 0: raise ValueError('chain must not be an empty string') title_suffix = chain + title_suffix ag = None if 'ag' in kwargs: ag = kwargs['ag'] if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') n_csets = ag.numCoordsets() elif model != 0: ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix) n_csets = 0 biomol = kwargs.get('biomol', False) auto_secondary = None secondary = kwargs.get('secondary') if not secondary: auto_secondary = SETTINGS.get('auto_secondary') secondary = auto_secondary split = 0 hd = None if model != 0: LOGGER.timeit() try: lines = stream.readlines() except AttributeError as err: try: lines = stream.read().split('\n') except AttributeError: raise err if not len(lines): raise ValueError('empty PDB file or stream') if header or biomol or secondary: hd, split = getHeaderDict(lines) _parsePDBLines(ag, lines, split, model, chain, subset, altloc) if ag.numAtoms() > 0: LOGGER.report('{0} atoms and {1} coordinate set(s) were ' 'parsed in %.2fs.'.format( ag.numAtoms(), ag.numCoordsets() - n_csets)) else: ag = None LOGGER.warn('Atomic data could not be parsed, please ' 'check the input file.') elif header: hd, split = getHeaderDict(stream) if ag is not None and isinstance(hd, dict): if secondary: if auto_secondary: try: ag = assignSecstr(hd, ag) except ValueError: pass else: ag = assignSecstr(hd, ag) if biomol: ag = buildBiomolecules(hd, ag) if isinstance(ag, list): LOGGER.info('Biomolecular transformations were applied, {0} ' 'biomolecule(s) are returned.'.format(len(ag))) else: LOGGER.info('Biomolecular transformations were applied to the ' 'coordinate data.') if model != 0: if header: return ag, hd else: return ag else: return hd
def parseMMCIFStream(stream, **kwargs): """Returns an :class:`.AtomGroup` and/or a class:`.StarDict` containing header data parsed from a stream of CIF lines. :arg stream: Anything that implements the method ``readlines`` (e.g. :class:`file`, buffer, stdin)""" model = kwargs.get('model') subset = kwargs.get('subset') chain = kwargs.get('chain') altloc = kwargs.get('altloc', 'A') header = kwargs.get('header', False) if model is not None: if isinstance(model, int): if model < 0: raise ValueError('model must be greater than 0') else: raise TypeError('model must be an integer, {0} is invalid'.format( str(model))) title_suffix = '' if subset: try: subset = _PDBSubsets[subset.lower()] except AttributeError: raise TypeError('subset must be a string') except KeyError: raise ValueError('{0} is not a valid subset'.format(repr(subset))) title_suffix = '_' + subset if chain is not None: if not isinstance(chain, str): raise TypeError('chain must be a string') elif len(chain) == 0: raise ValueError('chain must not be an empty string') title_suffix = '_' + chain + title_suffix ag = None if 'ag' in kwargs: ag = kwargs['ag'] if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') n_csets = ag.numCoordsets() elif model != 0: ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix) n_csets = 0 if model != 0: LOGGER.timeit() try: lines = stream.readlines() except AttributeError as err: try: lines = stream.read().split('\n') except AttributeError: raise err if not len(lines): raise ValueError('empty PDB file or stream') if header: ag, header = _parseMMCIFLines(ag, lines, model, chain, subset, altloc, header) else: ag = _parseMMCIFLines(ag, lines, model, chain, subset, altloc, header) if ag.numAtoms() > 0: LOGGER.report('{0} atoms and {1} coordinate set(s) were ' 'parsed in %.2fs.'.format( ag.numAtoms(), ag.numCoordsets() - n_csets)) else: ag = None LOGGER.warn('Atomic data could not be parsed, please ' 'check the input file.') if header: return ag, StarDict(*header, title=str(kwargs.get('title', 'Unknown'))) return ag
def parseCIFStream(stream, **kwargs): """Returns an :class:`.AtomGroup` and/or dictionary containing header data parsed from a stream of CIF lines. :arg stream: Anything that implements the method ``readlines`` (e.g. :class:`file`, buffer, stdin)""" model = kwargs.get('model') subset = kwargs.get('subset') chain = kwargs.get('chain') altloc = kwargs.get('altloc', 'A') if model is not None: if isinstance(model, int): if model < 0: raise ValueError('model must be greater than 0') else: raise TypeError('model must be an integer, {0} is invalid' .format(str(model))) title_suffix = '' if subset: try: subset = _PDBSubsets[subset.lower()] except AttributeError: raise TypeError('subset must be a string') except KeyError: raise ValueError('{0} is not a valid subset' .format(repr(subset))) title_suffix = '_' + subset if chain is not None: if not isinstance(chain, str): raise TypeError('chain must be a string') elif len(chain) == 0: raise ValueError('chain must not be an empty string') title_suffix = '_' + chain + title_suffix ag = None if 'ag' in kwargs: ag = kwargs['ag'] if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') n_csets = ag.numCoordsets() elif model != 0: ag = AtomGroup(str(kwargs.get('title', 'Unknown')) + title_suffix) n_csets = 0 if model != 0: LOGGER.timeit() try: lines = stream.readlines() except AttributeError as err: try: lines = stream.read().split('\n') except AttributeError: raise err if not len(lines): raise ValueError('empty PDB file or stream') ag = _parseCIFLines(ag, lines, model, chain, subset, altloc) if ag.numAtoms() > 0: LOGGER.report('{0} atoms and {1} coordinate set(s) were ' 'parsed in %.2fs.'.format(ag.numAtoms(), ag.numCoordsets() - n_csets)) else: ag = None LOGGER.warn('Atomic data could not be parsed, please ' 'check the input file.') return ag