parser = argparse.ArgumentParser( description="Collate databases into mutually exclusive sets. Please provide databases in correct order." ) parser.add_argument( 'files', metavar="file", type = str, nargs="+", help='name of file(s) to be combined, you may supply one or more.' ) parser.add_argument( '-p', dest='prefix', metavar="prefix", type = str, help='Prefix of output name', required=True ) args = parser.parse_args() files = [] for f in reversed(args.files): fset = set(line.strip() for line in open(f)) files.append([fset, 1 << fileno]) fileno += 1 r = fileno while r>0: for combo in nCr(files, r): bitmask = sum(k[1] for k in combo) intsect = set.intersection(*[k[0] for k in combo]) for k in combo: k[0] -= intsect # write to out files: filename = args.prefix + "." + printb(bitmask) + ".intersect.txt" with open(filename, "w") as ofile: print("Writing", filename) ctr = 0 for line in intsect: ofile.write(str(r) + "\t" + line + "\n") ctr += 1 print(ctr, "lines written.") r -= 1
def geom_find_group(g, atwts, pr_ax, mom, tt, \ nmax=_DEF.SYMM_MATCH_NMAX, \ tol=_DEF.SYMM_MATCH_TOL, \ dig=_DEF.SYMM_ATWT_ROUND_DIGITS, avmax=_DEF.SYMM_AVG_MAX): """ [Find all(?) proper rotation axes (n > 1) and reflection planes.] .. todo:: Complete geom_find_axes docstring INCLUDING NEW HEADER LINE DEPENDS on principal axes and moments being sorted such that: I_A <= I_B <= I_C Logic flow developed using: 1) http://symmetry.otterbein.edu/common/images/flowchart.pdf Accessed 6 Mar 2015 (flow chart) 2) Largent et al. J Comp Chem 22: 1637-1642 (2012). doi: 10.1002/jcc.22995 Helpful examples and descriptions of point groups from: 1) Wilson, Decius & Cross. "Molecular Vibrations." New York: Dover (1980), pp 82-85. 2) "Molecular Structures of Organic Compounds -- Symmetry of Molecules." Website of Prof. Dr. Stefan Immel, TU Darmstadt. http://http://csi.chemie.tu-darmstadt.de/ak/immel/script/ redirect.cgi?filename=http://csi.chemie.tu-darmstadt.de/ak/ immel/tutorials/symmetry/index7.html. Accessed 6 Mar 2015. Rotational symmetry numbers defined per: Irikura, K. K. "Thermochemistry: Appendix B: Essential Statistical Thermodynamics." Table II. NIST Computational Chemistry Comparison & Benchmark Database. Online resource: http://cccbdb.nist.gov/ thermo.asp. Accessed 6 Mar 2015. """ #!TODO: Implement principal axes threshold checking to tell if a # not-strictly spherical top is far enough from spherical to ignore # looking for cubic groups. Ugh. Doesn't find the reflection planes # in NH3. Going to have to explicitly deal with top type, since axes # *must* be principal axes of the molecule, and off-principal axes # will definitely never be symmetry elements. # If asymmetric, only do pr_ax # If symmetric, do the unique pr_ax and projections of atoms and # midpoints normal to that axis # If spherical, do everything, since every axis is inertially valid. # If linear, pretty much just checking for inversion center to tell # between C*v and D*h # Imports import numpy as np, itertools as itt from scipy import linalg as spla from ..const import PRM, EnumTopType as ETT from itertools import combinations as nCr from collections import namedtuple from ..error import SymmError # Define the Axis class Axis = namedtuple('Axis', 'vector order refl') # First, look for linear; exploit the top type, as linear should never # be mis-attributed if tt == ETT.LINEAR: # Check for plane of symmetry; if there, D*h; if not, C*v #!TODO: Once symmetry element reporting structure is established, # revise here to report the molecular axis as the symmetry element. if geom_symm_match(g, atwts, pr_ax[:,0], 0., True) < tol: # Has symmetry plane; D*h group = "D*h" symm_fac = 2 return group, symm_fac else: # No symmetry plane; C*v group = "C*v" symm_fac = 1 return group, symm_fac ## end if ## end if # Then, check for an atom if tt == ETT.ATOM: # Simple return group= "Kh" symm_fac = 1 return group, symm_fac ## end if # Generally, trust that the top classification is going to be more # rigorous than the symmetry identification. Thus, Spherical # will almost certainly indicate a cubic group; Symmetrical, whether # oblate or prolate, will indicate either a cubic group or a non-cubic # with a principal rotation axis of order > 2; and Asymmetrical leaves # room for any group to be found. # (move much of this comment to the docstring once it's working) # Vectorize the geometry and atwts g = make_nd_vec(g, nd=None, t=np.float64, norm=False) atwts = make_nd_vec(atwts, nd=None, t=np.float64, norm=False) # Also make coordinate-split geometry g_coord = g.reshape((g.shape[0] // 3, 3)) # Handle Spherical case if tt == ETT.SPHERICAL: # Build the list of atom midpoint axes ax_midpts = [] for atwt in np.unique(atwts): # Retrieve the sub-geometry g_atwt = g_subset(g, atwts, atwt, dig) # Only have axes to store if more than one atom if g_atwt.shape[0] > 3: # Reshape to grouped coordinates (row vectors) g_atwt = g_atwt.reshape((g_atwt.shape[0] // 3, 3)) # Iterate over all unique index tuples of pairs for tup in nCr(range(g_atwt.shape[0]), 2): # Just vector-add the appropriate atomic # coordinates; no need to normalize. ax_midpts.append(np.add(*g_atwt[tup,:])) ## next tup ## end if more than one matched atom ## next atwt, to index all midpoint axes in the system # Convert to 2-D array ax_midpts = np.array(ax_midpts) # Know for a fact that it should be a cubic group. Start looking at # atom-wise vectors until an order > 1 axis is found. order = i = 0 while order < 2 and i < g_coord.shape[0]: # Store the axis ax = g_coord[i,:] # Only check if norm is large enough if spla.norm(ax) > PRM.ZERO_VEC_TOL: order, refl = geom_check_axis(g, atwts, ax, nmax, \ tol) ## end if # Increment i += 1 ## loop # At this point, check to see if nothing found (could happen, e.g. # in C60 buckyball) and, if not, search midpoints between like # atoms, again until an order > 1 axis is found. # Otherwise, store the axis information as the initial reference. if order >= 2: # Found a good axis. Store as Axis. ref_Axis = Axis(vector=ax, order=order, refl=refl) else: # No good axis found along atom positions. Search midpoints. i = 0 while order < 2 and i < len(ax_midpts): # Store the axis ax = ax_midpts[i,:] # Only check if norm is large enough if spla.norm(ax) > PRM.ZERO_VEC_TOL: order, refl = geom_check_axis(g, atwts, ax, \ nmax, tol) ## end if # Increment i += 1 ## loop # If nothing found here, raise exception if order < 2: raise SymmError(SymmError.NOTFOUND, "Cubic point group not found in spherical top " + "molecule.", "geom_find_group()") ## end if # Store the found vector as Axis ref_Axis = Axis(vector=ax, order=order, refl=refl) ## end if #!RESUME: Search for other axes depending on the order of the axis found. return ref_Axis ## end if order < 2, triggering check of atom pairs # Leftover from originally not trusting top type ## # Must actually search for axes &c. ## # ## # Initialize the container for the principal axes ## Axes_pr = [] ## for ax in [pr_ax[:,i] for i in range(3)]: ## order, refl = geom_check_axis(g, atwts, ax, nmax, tol) ## if order > 1 or refl: ## Axes_pr.append(Axis(vector=ax, order=order, refl=refl)) ## ## end if ## ## next ax ## return Axes_pr ## ## # What is the max order found? ## # If < 3, asym or sph ## # If >=3, sym or sph; if multiple >2 then sph definitely ## # Not doing it this way (brute force) any more. ## # Initialize the axes list to the principal axes (matrix of column ## # vectors) ## ax_list = pr_ax ## ## # Vectorize the geometry ## g = make_nd_vec(g, nd=None, t=np.float64, norm=False) ## ## # Break into 3-vectors ## g_vecs = np.array(np.split(g, g.shape[0] // 3)) ## ## # Add all the atom displacements to the axes list ## ax_list = np.column_stack((ax_list, g_vecs.T)) ## ## # In each block of atom types, add axes up to 5th-order midpoints ## for atwt in np.unique(atwts): ## # Retrieve the sub-geometry ## g_atwt = g_subset(g, atwts, atwt, dig) ## ## # Reshape to grouped coordinates (row vectors) ## g_atwt = g_atwt.reshape((g_atwt.shape[0] // 3, 3)) ## ## # If more than one atom with the given weight, start at pairs ## # and go up from there ## if g_atwt.shape[0] >= 2: ## for grp_order in range(2, 1 + min(avmax, g_atwt.shape[0])): ## # Retrieve all unique index tuples for the indicated order ## for tup in nCr(range(g_atwt.shape[0]), grp_order): ## # Just vector-add the appropriate atomic coordinates. ## # No need to normalize or anything. ## ax_list = np.column_stack((ax_list, \ ## reduce(np.add,[g_atwt[i,:] for i in tup]).T)) ## ## next tup ## ## next order ## ## end if ## ## next atwt ## ## # Scrub any collinear axes down to uniques ## # Filter parallel axes ## i = 0 ## while i < ax_list.shape[1] - 1: ## j = i + 1 ## while j < ax_list.shape[1]: ## # For ANY collinear axes, remove until only one remains. ## v1 = ax_list[:,i] ## v2 = ax_list[:,j] ## if 1 - np.abs(np.dot(v1, v2) / spla.norm(v1) / spla.norm(v2)) \ ## < PRM.NON_PARALLEL_TOL: ## # Strip the duplicate vector ## ax_list = np.column_stack(( ## [ax_list[:,c] for c in \ ## range(ax_list.shape[1]) if c <> j] ## )) ## ## # Decrement j so that nothing is skipped ## j -= 1 ## ## # Increment j ## j += 1 ## ## loop j ## ## # Increment i ## i += 1 ## ## loop i ## ## # Cull any too-small axes ## i = 0 ## while i < ax_list.shape[1]: ## # Store vector ## v = ax_list[:,i] ## ## # Check magnitude ## if spla.norm(v) < PRM.ZERO_VEC_TOL: ## # Strip if too small of magnitude ## ax_list = np.column_stack(( ## [ax_list[:,c] for c in \ ## range(ax_list.shape[1]) if c <> i] ## )) ## ## # Decrement counter to maintain position in reduced array ## i -= 1 ## ## end if ## ## # Increment counter ## i +=1 ## ## loop ## ## # Search all remaining axes for rotations and reflections ## prop_list = [] ## for v in [ax_list[:,i] for i in range(ax_list.shape[1])]: ## order = geom_find_rotsymm(g, atwts, v, \ ## False, nmax, tol)[0] ## #print("Prin: " + str(v)) ## if order > 1: ## # Rotational axis worth reporting is found. Check reflection ## if geom_symm_match(g, atwts, v, 0, True) < tol: ## # Does have a reflection ## prop_list.append((v,order,True)) ## else: ## # No reflection ## prop_list.append((v,order,False)) ## ## end if ## else: ## # No rotation, but check for reflection ## if geom_symm_match(g, atwts, v, 0, True) < tol: ## # Has a reflection; do report ## prop_list.append((v,1,True)) ## ## end if ## ## end if ## ## next v ## ## # Then test all rotations for 2x-order impropers ## ## # Finally test for inversion center ## ## # Then search the point group catalog and assign return prop_list
def party_combo(iterable): """ Main logic for Challenge. . . python magic """ for name in nCr(iterable,2): print name[0].strip(), name[1].strip()
color = { 2: 'yellow', 3: 'green', 4: 'blue', 5: 'orange', 6: 'purple', 7: 'red' } files = [] for f in args.files: fset = set(line.strip() for line in open(f)) files.append(fset) files = [ list(k) for k in zip( syms, files, pos) ] r = len(files) count = 0 with open('segdup.txt', 'w') as ofile: while r>1: with open('segdup'+str(r)+'.txt', 'w') as ocase: with open('heatmap'+str(r)+'.txt', 'w') as hcase: for combo in nCr(files, r): intsect = set.intersection(*[k[1] for k in combo]) nos = len(intsect) # write to out files: for pair in nCr(combo, 2): first, second = pair ocase.write("\t".join([ first[0], str(first[2]), str(first[2] + nos), second[0], str(second[2]), str(second[2] + nos)]) + "\n") count += 1 # increment positions for k in combo: k[1] -= intsect start = k[2] end = start + nos hcase.write("\t".join([ k[0], str(start), str(end), 'color=' + color[r]]) + "\n")