Beispiel #1
0
parser = argparse.ArgumentParser( description="Collate databases into mutually exclusive sets. Please provide databases in correct order." )
parser.add_argument( 'files', metavar="file", type = str, nargs="+", help='name of file(s) to be combined, you may supply one or more.' )
parser.add_argument( '-p', dest='prefix', metavar="prefix", type = str, help='Prefix of output name', required=True )
		
args = parser.parse_args()

files = []
for f in reversed(args.files):
	fset = set(line.strip() for line in open(f))
	files.append([fset, 1 << fileno])
	fileno += 1

r = fileno
while r>0:
	for combo in nCr(files, r):
		bitmask = sum(k[1] for k in combo)
		intsect = set.intersection(*[k[0] for k in combo])
		for k in combo: k[0] -= intsect
		
		# write to out files:
		filename = args.prefix + "." + printb(bitmask) + ".intersect.txt"
		with open(filename, "w") as ofile:
			print("Writing", filename)
			ctr = 0
			for line in intsect:
				ofile.write(str(r) + "\t" + line + "\n")
				ctr += 1
			print(ctr, "lines written.")
			
	r -= 1
Beispiel #2
0
def geom_find_group(g, atwts, pr_ax, mom, tt, \
        nmax=_DEF.SYMM_MATCH_NMAX, \
        tol=_DEF.SYMM_MATCH_TOL, \
        dig=_DEF.SYMM_ATWT_ROUND_DIGITS,
        avmax=_DEF.SYMM_AVG_MAX):
    """ [Find all(?) proper rotation axes (n > 1) and reflection planes.]

    .. todo:: Complete geom_find_axes docstring INCLUDING NEW HEADER LINE

    DEPENDS on principal axes and moments being sorted such that:
        I_A <= I_B <= I_C

    Logic flow developed using:
        1) http://symmetry.otterbein.edu/common/images/flowchart.pdf
            Accessed 6 Mar 2015 (flow chart)
        2) Largent et al. J Comp Chem 22: 1637-1642 (2012).
            doi: 10.1002/jcc.22995

    Helpful examples and descriptions of point groups from:
        1) Wilson, Decius & Cross. "Molecular Vibrations." New York:
            Dover (1980), pp 82-85.
        2) "Molecular Structures of Organic Compounds -- Symmetry of
            Molecules." Website of Prof. Dr. Stefan Immel, TU Darmstadt.
            http://http://csi.chemie.tu-darmstadt.de/ak/immel/script/
            redirect.cgi?filename=http://csi.chemie.tu-darmstadt.de/ak/
            immel/tutorials/symmetry/index7.html. Accessed 6 Mar 2015.

    Rotational symmetry numbers defined per:
        Irikura, K. K. "Thermochemistry: Appendix B: Essential Statistical
        Thermodynamics." Table II. NIST Computational Chemistry Comparison
        & Benchmark Database. Online resource: http://cccbdb.nist.gov/
        thermo.asp. Accessed 6 Mar 2015.

    """
    #!TODO: Implement principal axes threshold checking to tell if a
    #  not-strictly spherical top is far enough from spherical to ignore
    #  looking for cubic groups.  Ugh. Doesn't find the reflection planes
    #  in NH3. Going to have to explicitly deal with top type, since axes
    #  *must* be principal axes of the molecule, and off-principal axes
    #  will definitely never be symmetry elements.
    #  If asymmetric, only do pr_ax
    #  If symmetric, do the unique pr_ax and projections of atoms and
    #   midpoints normal to that axis
    #  If spherical, do everything, since every axis is inertially valid.
    #  If linear, pretty much just checking for inversion center to tell
    #   between C*v and D*h

    # Imports
    import numpy as np, itertools as itt
    from scipy import linalg as spla
    from ..const import PRM, EnumTopType as ETT
    from itertools import combinations as nCr
    from collections import namedtuple
    from ..error import SymmError

    # Define the Axis class
    Axis = namedtuple('Axis', 'vector order refl')

    # First, look for linear; exploit the top type, as linear should never
    #  be mis-attributed
    if tt == ETT.LINEAR:
        # Check for plane of symmetry; if there, D*h; if not, C*v
        #!TODO: Once symmetry element reporting structure is established,
        #  revise here to report the molecular axis as the symmetry element.
        if geom_symm_match(g, atwts, pr_ax[:,0], 0., True) < tol:
            # Has symmetry plane; D*h
            group = "D*h"
            symm_fac = 2
            return group, symm_fac
        else:
            # No symmetry plane; C*v
            group = "C*v"
            symm_fac = 1
            return group, symm_fac
        ## end if
    ## end if

    # Then, check for an atom
    if tt == ETT.ATOM:
        # Simple return
        group= "Kh"
        symm_fac = 1
        return group, symm_fac
    ## end if

    # Generally, trust that the top classification is going to be more
    #  rigorous than the symmetry identification.  Thus, Spherical
    #  will almost certainly indicate a cubic group; Symmetrical, whether
    #  oblate or prolate, will indicate either a cubic group or a non-cubic
    #  with a principal rotation axis of order > 2; and Asymmetrical leaves
    #  room for any group to be found.
    # (move much of this comment to the docstring once it's working)

    # Vectorize the geometry and atwts
    g = make_nd_vec(g, nd=None, t=np.float64, norm=False)
    atwts = make_nd_vec(atwts, nd=None, t=np.float64, norm=False)

    # Also make coordinate-split geometry
    g_coord = g.reshape((g.shape[0] // 3, 3))

    # Handle Spherical case
    if tt == ETT.SPHERICAL:
        # Build the list of atom midpoint axes
        ax_midpts = []
        for atwt in np.unique(atwts):
            # Retrieve the sub-geometry
            g_atwt = g_subset(g, atwts, atwt, dig)

            # Only have axes to store if more than one atom
            if g_atwt.shape[0] > 3:
                # Reshape to grouped coordinates (row vectors)
                g_atwt = g_atwt.reshape((g_atwt.shape[0] // 3, 3))

                # Iterate over all unique index tuples of pairs
                for tup in nCr(range(g_atwt.shape[0]), 2):
                    # Just vector-add the appropriate atomic
                    #  coordinates; no need to normalize.
                    ax_midpts.append(np.add(*g_atwt[tup,:]))
                ## next tup
            ## end if more than one matched atom
        ## next atwt, to index all midpoint axes in the system

        # Convert to 2-D array
        ax_midpts = np.array(ax_midpts)

        # Know for a fact that it should be a cubic group. Start looking at
        #  atom-wise vectors until an order > 1 axis is found.
        order = i = 0
        while order < 2 and i < g_coord.shape[0]:
            # Store the axis
            ax = g_coord[i,:]

            # Only check if norm is large enough
            if spla.norm(ax) > PRM.ZERO_VEC_TOL:
                order, refl = geom_check_axis(g, atwts, ax, nmax, \
                                                                    tol)
            ## end if

            # Increment
            i += 1
        ## loop

        # At this point, check to see if nothing found (could happen, e.g.
        #  in C60 buckyball) and, if not, search midpoints between like
        #  atoms, again until an order > 1 axis is found.
        #  Otherwise, store the axis information as the initial reference.
        if order >= 2:
            # Found a good axis.  Store as Axis.
            ref_Axis = Axis(vector=ax, order=order, refl=refl)
        else:
            # No good axis found along atom positions. Search midpoints.
            i = 0
            while order < 2 and i < len(ax_midpts):
                # Store the axis
                ax = ax_midpts[i,:]

                # Only check if norm is large enough
                if spla.norm(ax) > PRM.ZERO_VEC_TOL:
                    order, refl = geom_check_axis(g, atwts, ax, \
                                                                nmax, tol)
                ## end if

                # Increment
                i += 1
            ## loop

            # If nothing found here, raise exception
            if order < 2:
                raise SymmError(SymmError.NOTFOUND,
                        "Cubic point group not found in spherical top " +
                        "molecule.", "geom_find_group()")
            ## end if

            # Store the found vector as Axis
            ref_Axis = Axis(vector=ax, order=order, refl=refl)
        ## end if

        #!RESUME: Search for other axes depending on the order of the axis found.
        return ref_Axis

        ## end if order < 2, triggering check of atom pairs

#   Leftover from originally not trusting top type
##        # Must actually search for axes &c.
##        #
##        # Initialize the container for the principal axes
##        Axes_pr = []
##        for ax in [pr_ax[:,i] for i in range(3)]:
##            order, refl = geom_check_axis(g, atwts, ax, nmax, tol)
##            if order > 1 or refl:
##                Axes_pr.append(Axis(vector=ax, order=order, refl=refl))
##            ## end if
##        ## next ax
##        return Axes_pr
##
##        # What is the max order found?
##        # If < 3, asym or sph
##        # If >=3, sym or sph; if multiple >2 then sph definitely
##
#    Not doing it this way (brute force) any more.
##        # Initialize the axes list to the principal axes (matrix of column
##        #  vectors)
##        ax_list = pr_ax
##
##        # Vectorize the geometry
##        g = make_nd_vec(g, nd=None, t=np.float64, norm=False)
##
##        # Break into 3-vectors
##        g_vecs = np.array(np.split(g, g.shape[0] // 3))
##
##        # Add all the atom displacements to the axes list
##        ax_list = np.column_stack((ax_list, g_vecs.T))
##
##        # In each block of atom types, add axes up to 5th-order midpoints
##        for atwt in np.unique(atwts):
##            # Retrieve the sub-geometry
##            g_atwt = g_subset(g, atwts, atwt, dig)
##
##            # Reshape to grouped coordinates (row vectors)
##            g_atwt = g_atwt.reshape((g_atwt.shape[0] // 3, 3))
##
##            # If more than one atom with the given weight, start at pairs
##            #  and go up from there
##            if g_atwt.shape[0] >= 2:
##                for grp_order in range(2, 1 + min(avmax, g_atwt.shape[0])):
##                    # Retrieve all unique index tuples for the indicated order
##                    for tup in nCr(range(g_atwt.shape[0]), grp_order):
##                        # Just vector-add the appropriate atomic coordinates.
##                        #  No need to normalize or anything.
##                        ax_list = np.column_stack((ax_list, \
##                                reduce(np.add,[g_atwt[i,:] for i in tup]).T))
##                    ## next tup
##                ## next order
##            ## end if
##        ## next atwt
##
##        # Scrub any collinear axes down to uniques
##        # Filter parallel axes
##        i = 0
##        while i < ax_list.shape[1] - 1:
##            j = i + 1
##            while j < ax_list.shape[1]:
##                # For ANY collinear axes, remove until only one remains.
##                v1 = ax_list[:,i]
##                v2 = ax_list[:,j]
##                if 1 - np.abs(np.dot(v1, v2) / spla.norm(v1) / spla.norm(v2)) \
##                                                    < PRM.NON_PARALLEL_TOL:
##                    # Strip the duplicate vector
##                    ax_list = np.column_stack((
##                            [ax_list[:,c] for c in \
##                                    range(ax_list.shape[1]) if c <> j]
##                                                ))
##
##                    # Decrement j so that nothing is skipped
##                    j -= 1
##
##                # Increment j
##                j += 1
##            ## loop j
##
##            # Increment i
##            i += 1
##        ## loop i
##
##        # Cull any too-small axes
##        i = 0
##        while i < ax_list.shape[1]:
##            # Store vector
##            v = ax_list[:,i]
##
##            # Check magnitude
##            if spla.norm(v) < PRM.ZERO_VEC_TOL:
##                # Strip if too small of magnitude
##                ax_list = np.column_stack((
##                        [ax_list[:,c] for c in \
##                                range(ax_list.shape[1]) if c <> i]
##                                            ))
##
##                # Decrement counter to maintain position in reduced array
##                i -= 1
##            ## end if
##
##            # Increment counter
##            i +=1
##        ## loop
##
##        # Search all remaining axes for rotations and reflections
##        prop_list = []
##        for v in [ax_list[:,i] for i in range(ax_list.shape[1])]:
##            order = geom_find_rotsymm(g, atwts, v, \
##                                                False, nmax, tol)[0]
##            #print("Prin: " + str(v))
##            if order > 1:
##                # Rotational axis worth reporting is found. Check reflection
##                if geom_symm_match(g, atwts, v, 0, True) < tol:
##                    # Does have a reflection
##                    prop_list.append((v,order,True))
##                else:
##                    # No reflection
##                    prop_list.append((v,order,False))
##                ## end if
##            else:
##                # No rotation, but check for reflection
##                if geom_symm_match(g, atwts, v, 0, True) < tol:
##                    # Has a reflection; do report
##                    prop_list.append((v,1,True))
##                ## end if
##            ## end if
##        ## next v
##
##        # Then test all rotations for 2x-order impropers
##
##        # Finally test for inversion center
##
##        # Then search the point group catalog and assign



    return prop_list
Beispiel #3
0
def party_combo(iterable):
    """ Main logic for Challenge. . . python magic """

    for name in nCr(iterable,2):
       print name[0].strip(), name[1].strip() 
Beispiel #4
0
color = { 2: 'yellow', 3: 'green', 4: 'blue', 5: 'orange', 6: 'purple', 7: 'red' }

files = []
for f in args.files:
	fset = set(line.strip() for line in open(f))
	files.append(fset)

files = [ list(k) for k in zip( syms, files, pos) ]

r = len(files)
count = 0
with open('segdup.txt', 'w') as ofile:
	while r>1:
		with open('segdup'+str(r)+'.txt', 'w') as ocase:
			with open('heatmap'+str(r)+'.txt', 'w') as hcase:
				for combo in nCr(files, r):
					intsect = set.intersection(*[k[1] for k in combo])
					nos = len(intsect)

					# write to out files:
					for pair in nCr(combo, 2):
						first, second = pair
						ocase.write("\t".join([ first[0], str(first[2]), str(first[2] + nos), second[0], str(second[2]), str(second[2] + nos)]) + "\n")
						count += 1
										
					# increment positions
					for k in combo: 
						k[1] -= intsect
						start =	k[2]
						end = start + nos
						hcase.write("\t".join([ k[0], str(start), str(end), 'color=' + color[r]]) + "\n")