def expand_crystal(structure, n=1, name='XTAL'): """Expands the contents of a structure to a crystal of a given size. Returns a `` StructureHolder`` entity instance. Arguments: - structure: ``Structure`` entity instance. - n: number number of unit-cell layers. - name: optional name. Requires a PDB file with correct CRYST1 field and space group information. """ sh = structure.header sn = structure.name fmx = sh['uc_fmx'] omx = sh['uc_omx'] # get initial coorinates atoms = einput(structure, 'A') coords = array([atoms.getData('coords')]) # fake 3D # expand the coordinates to crystal all_coords = coords_to_crystal(coords, fmx, omx, n) structures = StructureHolder(name) rng = range(-n, n + 1) # a range like -2, -1, 0, 1, 2 vectors = [(x, y, z) for x in rng for y in rng for z in rng] for i, (u, v, w) in enumerate(vectors): new_structure = copy(structure) new_atoms = einput(new_structure, 'A') new_coords = all_coords[i, 0] for (atom_id, new_coord) in izip(atoms.keys(), new_coords): new_atoms[atom_id].coords = new_coord new_structure.setName("%s_%s%s%s" % (sn, u, v, w)) structures.addChild(new_structure) return structures
def _prepare_asa(entities, symmetry_mode=None, crystal_mode=None, points=960, \ **kwargs): """Prepares the atomic solvent-accessible surface area (ASA) calculation. Arguments: - entities: input entities for ASA calculation (most commondly a structure entity). - symmetry_mode (str): One of 'uc', 'bio' or 'table'. This defines the transformations of applied to the coordinates of the input entities. It is one of 'bio', 'uc' or 'table'. Where 'bio' and 'uc' are transformations to create the biological molecule or unit-cell from the PDB header. The 'table' uses transformation matrices derived from space-group information only using crystallographic tables(requires ``cctbx``). - crystal_mode (int): Defines the number of unit-cells to expand the initial unit-cell into. The number of unit-cells in each direction i.e. 1 is makes a total of 27 unit cells: (-1, 0, 1) == 3, 3^3 == 27 - points: number of points on atom spheres higher is slower but more accurate. Additional keyworded arguments are passed to the ``_run_asa`` function. """ # generate uniform points on the unit-sphere spoints = sphere_points(points) # prepare entities for asa calculation # free-floating area mode result = {} atoms = einput(entities, 'A') if not symmetry_mode and not crystal_mode: coords = array(atoms.getData('coords', forgiving=False)) coords = array([[coords]]) # fake 3D and 4D idx_to_id = dict(enumerate(atoms.getData('getFull_id', \ forgiving=False, method=True))) asas = _run_asa(atoms, coords, spoints, **kwargs) for idx in xrange(asas.shape[0]): result[idx_to_id[idx]] = asas[idx] # crystal-contact area mode elif symmetry_mode in ('table', 'uc'): structure = einput(entities, 'S').values()[0] sh = structure.header coords = array(atoms.getData('coords', forgiving=False)) idx_to_id = dict(enumerate(atoms.getData('getFull_id', \ forgiving=False, method=True))) # expand to unit-cell, real 3D coords = coords_to_symmetry(coords, \ sh[symmetry_mode + '_fmx'], \ sh[symmetry_mode + '_omx'], \ sh[symmetry_mode + '_mxs'], \ symmetry_mode) # expand to crystal, real 4D if crystal_mode: coords = coords_to_crystal(coords, \ sh[symmetry_mode + '_fmx'], \ sh[symmetry_mode + '_omx'], \ crystal_mode) # real 4D else: coords = array([coords]) # fake 4D asas = _run_asa(atoms, coords, spoints, **kwargs) for idx in xrange(asas.shape[0]): result[idx_to_id[idx]] = asas[idx] # biological area mode elif symmetry_mode == 'bio': structure = einput(entities, 'S').values()[0] chains = einput(entities, 'C') sh = structure.header start = 0 for chain_ids, mx_num in sh['bio_cmx']: sel = chains.selectChildren(chain_ids, 'contains', 'id').values() atoms = einput(sel, 'A') coords = array(atoms.getData('coords', forgiving=False)) idx_to_id = dict(enumerate(atoms.getData('getFull_id', \ forgiving=False, method=True))) stop = start + mx_num coords = coords_to_symmetry(coords, \ sh['uc_fmx'], \ sh['uc_omx'], \ sh['bio_mxs'][start:stop], \ symmetry_mode) coords = array([coords]) start = stop asas = _run_asa(atoms, coords, spoints, **kwargs) for idx in xrange(asas.shape[0]): result[idx_to_id[idx]] = asas[idx] return result
def _prepare_contacts(query, model=None, level='A', search_limit=6.0, \ contact_mode='diff_chain', symmetry_mode=None, \ crystal_mode=None, **kwargs): """Prepares distance contact calculations. Arguments: - query(entitie[s]): query entitie[s] for contact calculation (most commonly a structure entity). - model(entity): a Model entity which will be transformed according to symmetry_mode and crystal_mode. (most commonly it is the same as the query) - level(str): The level in the hierarchy at which distances will be calculated (most commonly 'A' for atoms) - search_limit(float): maximum distance in Angstrom's - contact_mode(str): One of "diff_cell", "diff_sym", "diff_chain". Defines the allowed contacts i.e. requires that contacts are by entities, which have: "diff_cell" different unit cells; "diff_sym" different symmetry operators (if in the same unit cell) "diff_chain" with different chain ids (if in the same unit cell and symmetry). - symmetry_mode (str): One of 'uc', 'bio' or 'table'. This defines the transformations of applied to the coordinates of the input entities. It is one of 'bio', 'uc' or 'table'. Where 'bio' and 'uc' are transformations to create the biological molecule or unit-cell from the PDB header. The 'table' uses transformation matrices derived from space-group information only using crystallographic tables(requires ``cctbx``). - crystal_mode (int): Defines the number of unit-cells to expand the initial unit-cell into. The number of unit cells in each direction i.e. 1 is makes a total of 27 unit cells: (-1, 0, 1) == 3, 3^3 == 27 Additional arguments are passed to the ``cnt_loop`` Cython function. """ contact_mode = {'diff_asu' :0, 'diff_sym' :1, 'diff_chain':2 }[contact_mode] # determine unique structure structure = einput(query, 'S').values()[0] sh = structure.header # if not specified otherwise the lattice is the first model lattice = model or structure[(0,)] lents = einput(lattice, level) lents_ids = lents.getData('getFull_id', forgiving=False, method=True) lcoords = array(lents.getData('coords', forgiving=False)) qents = einput(query, level) qents_ids = qents.getData('getFull_id', forgiving=False, method=True) qcoords = array(qents.getData('coords', forgiving=False)) if symmetry_mode: if symmetry_mode == 'table': lcoords = coords_to_symmetry(lcoords, \ sh['table_fmx'], \ sh['table_omx'], \ sh['table_mxs'], \ symmetry_mode) elif symmetry_mode == 'uc': lcoords = coords_to_symmetry(lcoords, \ sh['uc_fmx'], \ sh['uc_omx'], \ sh['uc_mxs'], \ symmetry_mode) elif symmetry_mode == 'bio': # TODO see asa raise ValueError("Unsupported symmetry_mode: %s" % symmetry_mode) else: raise ValueError("Unsupported symmetry_mode: %s" % symmetry_mode) else: lcoords = array([lcoords]) # fake 3D if crystal_mode: zero_tra = {1:13, 2:62, 3:171}[crystal_mode] # 0,0,0 translation is: Thickened cube numbers: # a(n)=n*(n^2+(n-1)^2)+(n-1)*2*n*(n-1). # 1, 14, 63, 172, 365, 666, 1099, 1688, 2457, 3430, 4631, 6084, 7813 ... if symmetry_mode == 'table': lcoords = coords_to_crystal(lcoords, \ sh['table_fmx'], \ sh['table_omx'], \ crystal_mode) elif symmetry_mode == 'uc': lcoords = coords_to_crystal(lcoords, \ sh['uc_fmx'], \ sh['uc_omx'], \ crystal_mode) else: raise ValueError('crystal_mode not possible for "bio" symmetry') else: zero_tra = 0 lcoords = array([lcoords]) # fake 4D shape = lcoords.shape lcoords = lcoords.reshape((shape[0] * shape[1] * shape[2], shape[3])) box = r_[qcoords.min(axis=0) - search_limit, \ qcoords.max(axis=0) + search_limit] lc = [] # lattice chain qc = [] # query chain lchains = [i[2] for i in lents_ids] qchains = [i[2] for i in qents_ids] allchains = set() allchains.update(lchains) allchains.update(qchains) chain2id = dict(zip(allchains, range(len(allchains)))) for lent_id in lents_ids: lc.append(chain2id[lent_id[2]]) for qent_id in qents_ids: qc.append(chain2id[qent_id[2]]) lc = array(lc, dtype=int64) qc = array(qc, dtype=int64) # here we leave python (idxc, n_src, n_asu, n_sym, n_tra, n_dst) = cnt_loop(\ qcoords, lcoords, qc, lc, shape[1], shape[2], \ zero_tra, contact_mode, search_limit, box, \ **kwargs) result = defaultdict(dict) for contact in xrange(idxc): qent_id = qents_ids[n_src[contact]] lent_id = lents_ids[n_asu[contact]] result[qent_id][lent_id] = (sqrt(n_dst[contact]), n_tra[contact], n_sym[contact]) return result
def _prepare_contacts(query, model=None, level='A', search_limit=6.0, \ contact_mode='diff_chain', symmetry_mode=None, \ crystal_mode=None, **kwargs): """Prepares distance contact calculations. Arguments: - query(entitie[s]): query entitie[s] for contact calculation (most commonly a structure entity). - model(entity): a Model entity which will be transformed according to symmetry_mode and crystal_mode. (most commonly it is the same as the query) - level(str): The level in the hierarchy at which distances will be calculated (most commonly 'A' for atoms) - search_limit(float): maximum distance in Angstrom's - contact_mode(str): One of "diff_cell", "diff_sym", "diff_chain". Defines the allowed contacts i.e. requires that contacts are by entities, which have: "diff_cell" different unit cells; "diff_sym" different symmetry operators (if in the same unit cell) "diff_chain" with different chain ids (if in the same unit cell and symmetry). - symmetry_mode (str): One of 'uc', 'bio' or 'table'. This defines the transformations of applied to the coordinates of the input entities. It is one of 'bio', 'uc' or 'table'. Where 'bio' and 'uc' are transformations to create the biological molecule or unit-cell from the PDB header. The 'table' uses transformation matrices derived from space-group information only using crystallographic tables(requires ``cctbx``). - crystal_mode (int): Defines the number of unit-cells to expand the initial unit-cell into. The number of unit cells in each direction i.e. 1 is makes a total of 27 unit cells: (-1, 0, 1) == 3, 3^3 == 27 Additional arguments are passed to the ``cnt_loop`` Cython function. """ contact_mode = { 'diff_asu': 0, 'diff_sym': 1, 'diff_chain': 2 }[contact_mode] # determine unique structure structure = einput(query, 'S').values()[0] sh = structure.header # if not specified otherwise the lattice is the first model lattice = model or structure[(0, )] lents = einput(lattice, level) lents_ids = lents.getData('getFull_id', forgiving=False, method=True) lcoords = array(lents.getData('coords', forgiving=False)) qents = einput(query, level) qents_ids = qents.getData('getFull_id', forgiving=False, method=True) qcoords = array(qents.getData('coords', forgiving=False)) if symmetry_mode: if symmetry_mode == 'table': lcoords = coords_to_symmetry(lcoords, \ sh['table_fmx'], \ sh['table_omx'], \ sh['table_mxs'], \ symmetry_mode) elif symmetry_mode == 'uc': lcoords = coords_to_symmetry(lcoords, \ sh['uc_fmx'], \ sh['uc_omx'], \ sh['uc_mxs'], \ symmetry_mode) elif symmetry_mode == 'bio': # TODO see asa raise ValueError("Unsupported symmetry_mode: %s" % symmetry_mode) else: raise ValueError("Unsupported symmetry_mode: %s" % symmetry_mode) else: lcoords = array([lcoords]) # fake 3D if crystal_mode: zero_tra = {1: 13, 2: 62, 3: 171}[crystal_mode] # 0,0,0 translation is: Thickened cube numbers: # a(n)=n*(n^2+(n-1)^2)+(n-1)*2*n*(n-1). # 1, 14, 63, 172, 365, 666, 1099, 1688, 2457, 3430, 4631, 6084, 7813 ... if symmetry_mode == 'table': lcoords = coords_to_crystal(lcoords, \ sh['table_fmx'], \ sh['table_omx'], \ crystal_mode) elif symmetry_mode == 'uc': lcoords = coords_to_crystal(lcoords, \ sh['uc_fmx'], \ sh['uc_omx'], \ crystal_mode) else: raise ValueError('crystal_mode not possible for "bio" symmetry') else: zero_tra = 0 lcoords = array([lcoords]) # fake 4D shape = lcoords.shape lcoords = lcoords.reshape((shape[0] * shape[1] * shape[2], shape[3])) box = r_[qcoords.min(axis=0) - search_limit, \ qcoords.max(axis=0) + search_limit] lc = [] # lattice chain qc = [] # query chain lchains = [i[2] for i in lents_ids] qchains = [i[2] for i in qents_ids] allchains = set() allchains.update(lchains) allchains.update(qchains) chain2id = dict(zip(allchains, range(len(allchains)))) for lent_id in lents_ids: lc.append(chain2id[lent_id[2]]) for qent_id in qents_ids: qc.append(chain2id[qent_id[2]]) lc = array(lc, dtype=int64) qc = array(qc, dtype=int64) # here we leave python (idxc, n_src, n_asu, n_sym, n_tra, n_dst) = cnt_loop(\ qcoords, lcoords, qc, lc, shape[1], shape[2], \ zero_tra, contact_mode, search_limit, box, \ **kwargs) result = defaultdict(dict) for contact in xrange(idxc): qent_id = qents_ids[n_src[contact]] lent_id = lents_ids[n_asu[contact]] result[qent_id][lent_id] = (sqrt(n_dst[contact]), n_tra[contact], n_sym[contact]) return result