예제 #1
0
def _get_solute_index(fpl_obj):
    try:
        xyz = fpl_obj.data[-1]
    except TypeError:
        xyz = [fpl_obj.data.atoms]

    system = fpl_obj.system
    ## Store end of last LAMMPs simulation to system.atoms variable
    for a, b in zip(system.atoms, xyz[-1]):
        a.x, a.y, a.z = b.x, b.y, b.z
        if any([np.isnan(x) for x in (a.x, a.y, a.z)]):
            return None

    ## Grab only molecules we're interested in.  Here we find relative distances to the solute in question
    molecules_in_cluster = []
    m_solute = None
    if fpl_obj.solute:
        m_solute = structures.Molecule(fpl_constants.cml_dir + fpl_obj.solute,
                                       test_charges=False,
                                       allow_errors=True)
        diffs = []
        for molec in system.molecules:
            # NOTE, ORDER MATTERS! As procrustes WILL change the atomic positions of the
            # second list of atoms to best match the first.  We don't care if m_solute
            # changes, but if everything else overlaps with m_solute then we have an issue.
            chk = [molec.atoms, m_solute.atoms]
            if len(chk[0]) != len(chk[1]): continue
            #chk = [copy.deepcopy(molec.atoms), copy.deepcopy(m_solute.atoms)]
            geometry.procrustes(chk)
            diffs.append(geometry.motion_per_frame(chk)[-1])
        index_of_solute = diffs.index(min(diffs))
    else:
        index_of_solute = 0

    return index_of_solute
예제 #2
0
def makeMoleculePattern(mol):
	'''Make molecule pattern from Molecule
	'''
	m = st.Molecule(mol.name, mol.idx)
	sp = st.Species()
	sp.addMolecule(m)
	return sp
예제 #3
0
def makeUnboundPattern(mol,comp):
	'''Make unbound pattern from Molecule Component
	'''
	c = st.Component(comp.name,comp.idx,[],[])
	m = st.Molecule(mol.name,mol.idx)
	m.addComponent(c)
	sp = st.Species()
	sp.addMolecule(m)
	return sp
예제 #4
0
    def generate_system(self, halide, cation, ion="Pb"):
        """
		Generate a system of solute + solvents using Packmol.

		**Returns**

			None
		"""
        ## Generate empty system
        system = structures.System(box_size=(25, 25, 25), name=self.run_name)
        ## Get structures for solvent and solute
        # Check if upper case or lower case file exists and use the one that does
        if os.path.exists(fpl_constants.cml_dir + self.solvent_name.lower() +
                          ".cml"):
            solvent = structures.Molecule(
                fpl_constants.cml_dir + self.solvent_name.lower() + ".cml",
                extra_parameters=self.extra,
                allow_errors=True,
                default_angles=fpl_constants.default_angles)
        elif os.path.exists(fpl_constants.cml_dir + self.solvent_name.upper() +
                            ".cml"):
            solvent = structures.Molecule(
                fpl_constants.cml_dir + self.solvent_name.upper() + ".cml",
                extra_parameters=self.extra,
                allow_errors=True,
                default_angles=fpl_constants.default_angles)
        else:
            raise Exception(
                "Solvent file %s.cml does not exist in %s.  Ensure you gave the file exists and re-run."
                % (self.solvent_name, fpl_constants.cml_dir))
        if self.solute is not None:
            fpl_utils.generate_lead_halide_cation(halide, cation, ion=ion)
            solute = structures.Molecule(
                fpl_constants.cml_dir + self.solute,
                test_charges=False,
                allow_errors=True,
                default_angles=fpl_constants.default_angles)
            system.add(solute)
        ## Pack the system
        system.packmol((solvent, ), (1, ),
                       fpl_constants.solvent[self.solvent_name]["density"],
                       self.seed)

        self.system = system
예제 #5
0
def makeBondPattern(mc1,mc2):
	'''Make bond pattern from [Molecule Component], [Molecule Component]
	'''
	[m1,c1] = mc1
	[m2,c2] = mc2
	
	c_1 = st.Component(c1.name,c1.idx,[],[])
	c_1.addBond('1')
	m_1 = st.Molecule(m1.name, m1.idx)
	m_1.addComponent(c_1)
	
	c_2 = st.Component(c2.name,c2.idx,[],[])
	c_2.addBond('1')
	m_2 = st.Molecule(m2.name, m2.idx)
	m_2.addComponent(c_2)
	
	sp = st.Species()
	sp.addMolecule(m_1)
	sp.addMolecule(m_2)
	return sp
예제 #6
0
def makeStatePattern(mol,comp,state):
	'''Make state pattern from Molecule Component State
	'''
	c = st.Component(comp.name,comp.idx,[],[])
	c.addState(state)
	c.setActiveState(state)

	m = st.Molecule(mol.name,mol.idx)
	m.addComponent(c)	
	sp = st.Species()
	sp.addMolecule(m)
	return sp
예제 #7
0
def parseMolecules(molecules):
    '''
    Parses an XML molecule section
    Returns: a molecule structure
    '''
    mol = st.Molecule(molecules.get('name'), molecules.get('id'))
    components = \
      molecules.find('.//{http://www.sbml.org/sbml/level3}ListOfComponentTypes')
    if components != None:
        for component in components.getchildren():
            comp = st.Component(component.get('name'), component.get('id'))
            mol.addComponent(comp)
    return mol
예제 #8
0
def read_seed(path="./seed", extra_parameters={}):
    """
    Read in all cml files from the seed directory.

    **Parameters**

        path: *str, optional*
            A path to the seed directory.
        extra_parameters: *dict, optional*
            Additional parameters to add to OPLSAA.

    **Returns**

        molecules_A: *list, list, molecules*
            A list of molecules from the seed directory
        molecules_B: *list, molecules*
            A list of molecules from the seed directory.  In this case,
            we merge child molecules into one.
    """
    if path.endswith("/"):
        path = path[:-1]
    if not os.path.exists(path):
        raise Exception("Unable to find seed directory")

    molecules_A = []
    for fptr in os.listdir(path):
        if not fptr.endswith(".cml"):
            continue
        molecules_A.append(
            files.read_cml(path + "/" + fptr,
                           return_molecules=True,
                           allow_errors=True,
                           test_charges=False,
                           extra_parameters=extra_parameters))

    if molecules_A == []:
        raise Exception("Seed directory is empty")

    molecules_B = []
    for seed in molecules_A:
        atoms, bonds, angles, dihedrals = [], [], [], []
        for mol in seed:
            atoms += mol.atoms
            bonds += mol.bonds
            angles += mol.angles
            dihedrals += mol.dihedrals
        molecules_B.append(structures.Molecule(atoms, bonds, angles,
                                               dihedrals))

    return molecules_A, molecules_B
예제 #9
0
def generate_lead_halide(halide, ion="Pb"):
    PbX = structures.Molecule([structures.Atom(ion, 0, 0, 0)])
    if type(halide) is str:
        halide = [halide, halide, halide]

    def vdw(y):
        return PERIODIC_TABLE[units.elem_s2i(y)]['vdw_r']

    for x in halide:
        v = vdw(x)
        PbX.atoms.append(structures.Atom(x, v, 0, 0.5 * v))
        R = geometry.rotation_matrix([0, 0, 1], 120, units="deg")
        PbX.rotate(R)
    return PbX
예제 #10
0
def merge(reactant1, reactant2, r1, r2, translator, outputFlag=False):
    '''
    Receives two species reactant1 and reactant1, and their intersection points 
    r1 and r2 and creates a new complex that is the union
    '''
    species = st.Species()
    if reactant1 in translator:
        species.append(translator[reactant1])
    if reactant2 in translator:
        species.append(translator[reactant2])
    if outputFlag:
        print '-----------', species, reactant1, reactant2, reactant2 in translator
    rnd = max(species.getBondNumbers()) + 1
    molecule1 = st.Molecule(binding1)
    molecule2 = st.Molecule(binding2)
    component1 = st.Component(r1)
    component2 = st.Component(r2)
    component1.addBond(str(rnd))
    component2.addBond(str(rnd))
    molecule1.addComponent(component1)
    molecule2.addComponent(component2)
    species.addMolecule(molecule1, True, 1)
    counter = 2 if binding1 == binding2 else 1
    species.addMolecule(molecule2, True, counter)

    ####TODO: update the rawDAtabase with the m1m2 information
    if molecule1.name in translator:
        sp = st.Species()
        sp.addMolecule(deepcopy(molecule1))
        translator[molecule1.name].extend(sp)
        translator[molecule1.name].reset()

    if molecule2.name in translator:
        sp = st.Species()
        sp.addMolecule(deepcopy(molecule2))
        translator[molecule2.name].extend(sp)
        translator[molecule2.name].reset()
예제 #11
0
def createMolecule(molecule, bonds):
    nameDict = {}
    mol = st.Molecule(molecule.get('name'), molecule.get('id'))
    nameDict[molecule.get('id')] = molecule.get('name')
    listOfComponents = molecule.find(
        './/{http://www.sbml.org/sbml/level3}ListOfComponents')
    if listOfComponents != None:
        for element in listOfComponents:
            component = st.Component(element.get('name'), element.get('id'))
            nameDict[element.get('id')] = element.get('name')
            if element.get('numberOfBonds') in ['+', '?']:
                component.addBond(element.get('numberOfBonds'))
            elif element.get('numberOfBonds') != '0':
                component.addBond(findBond(bonds, element.get('id')))
            state = element.get(
                'state') if element.get('state') != None else ''
            component.states.append(state)
            component.activeState = state
            mol.addComponent(component)
    return mol, nameDict
예제 #12
0
def get_test_system(length_in_ang=6.0,
                    number_per_side=3,
                    path_to_unit_cell="/fs/home/hch54/Grad-MCSMRFF\
/PbCl3Cs/unit_cell"):
    L = length_in_ang
    N = number_per_side
    dim = L * N + 0.5
    test_system = structures.System(box_size=[dim, dim, dim], name="test_run")
    PbMACl3 = structures.Molecule(path_to_unit_cell,
                                  extra_parameters=extra_Pb,
                                  test_charges=False)

    count = 0
    for xi in range(N):
        for yi in range(N):
            for zi in range(N):
                count += 1
                x, y, z = (xi - 0.5) * L, (yi - 0.5) * L, (zi - 0.5) * L
                test_system.add(PbMACl3, x, y, z)

    return test_system
예제 #13
0
def extractTransformations(rules):
    '''
    goes through the list of rules and extracts its reactioncenter,context and product
    atomic patterns per transformation action
    also resolves wildcard patterns to create additional context links and deleting the wildcard pattern - js
    '''
    atomicArray = {}
    transformationCenter = []
    transformationContext = []
    productElements = []
    actionName = []
    index = 0
    label = []

    #for idx,(react,product,act,mapp,nameDict) in enumerate(rules):
    #	print idx
    #print "react\t"," ".join([str(x) for x in react])
    #print "product\t"," ".join([str(x) for x in product])
    #print "act\t"," ".join([str(x) for x in act])
    #print "mapp\t"," ".join([str(x) for x in mapp])
    #print "nameDict\t"," ".join([x+":"+y for x,y in nameDict.items()])

    for react, product, act, mapp, nameDict in rules:
        index += 1
        for action in act:
            atomic, reactionCenter, context = extractMolecules(
                action.action, action.site1, action.site2, react)
            atomicArray.update(atomic)
            # this method does not extract reaction centers for creation and deletion transformations
            # however it extracts context correctly
            # so generate the reactioncenter here

            if action.action == 'Delete':
                temp = st.Species()
                temp.addMolecule(st.Molecule(nameDict[action.site1], 1))
                atomic = dict()
                atomic[str(temp)] = temp
                atomicArray.update(atomic)
                transformationCenter.append(set([str(temp)]))
                productElements.append(set())
                transformationContext.append(context)

            elif action.action == 'Add':
                temp = st.Species()
                temp.addMolecule(st.Molecule(nameDict[action.site1], 1))
                atomic = dict()
                atomic[str(temp)] = temp
                atomicArray.update(atomic)
                transformationCenter.append(set())
                productElements.append(set([str(temp)]))
                transformationContext.append(context)

            else:

                transformationCenter.append(reactionCenter)
                transformationContext.append(context)
                atomicArray.update(atomic)
                productSites = [
                    getMapping(mapp, action.site1),
                    getMapping(mapp, action.site2)
                ]
                atomic, rc, _ = extractMolecules(action.action,
                                                 productSites[0],
                                                 productSites[1], product)
                productElements.append(rc)
                atomicArray.update(atomic)

            actionName.append('%i-%s' % (index, action.action))
            r = '+'.join([str(x) for x in react])
            p = '+'.join([str(x) for x in product])
            label.append('->'.join([r, p, '%i-%s' % (index, action.action)]))

    # resolving bond wildcards
    wildcards = [x for x in atomicArray if '!+' in x]
    bondedpatterns = [
        x for x in atomicArray if '!' in x and x not in wildcards
    ]
    for item in wildcards:
        loc = string.find(item, '+')
        selected_bondedpatterns = [
            x for x in bondedpatterns if item[0:loc] in x
        ]

        for idx, set1 in enumerate(transformationContext):
            if item in set1:
                set1.update(selected_bondedpatterns)
                set1.remove(item)
                transformationContext[idx] = set1
        del atomicArray[item]

    return atomicArray, transformationCenter, transformationContext, productElements, actionName, label
예제 #14
0
def catalysis(original, dictionary, rawDatabase, catalysisDatabase, translator,
              namingConvention, classification, reactionProperties):
    """
    This method is for reactions of the form A+ B -> A' + B
    """

    #if 'EGF_EGFRim2_GAP_Grb2_Sos_Ras_GDP' in original[0] or 'EGF_EGFRim2_GAP_Grb2_Sos_Ras_GDP' in original[1]:
    #    print original,'EGF_EGFRim2_GAP_Grb2_Sos_Ras_GDP' in translator
    result = catalyze(namingConvention[0], namingConvention[1], classification,
                      rawDatabase, translator, reactionProperties)
    k = [min(namingConvention, key=len) in x for x in original[0]]
    k2 = [max(namingConvention, key=len) in x for x in original[1]]
    k = k and k2
    sortedResult = [result[0], result[1]] if any(k) else [result[1], result[0]]
    sortedConvention = [
        namingConvention[0], namingConvention[1]
    ] if any(k) else [namingConvention[1], namingConvention[0]]
    flag = False
    if 'EGF_EGFRm2' in original[1]:
        print 'hello'

    for reactantGroup, res, conv in zip(original, sortedResult,
                                        sortedConvention):
        for reactant in reactantGroup:
            flag = False
            species = st.Species()
            #if original[0][0] in translator:
            #    species = deepcopy(translator[original[0][0]])

            #make a copy of the original element we are going to modify
            if reactant in translator:
                species = deepcopy(translator[reactant])
            elif sortedConvention[0] in translator:
                species = deepcopy(translator[sortedConvention[0]])
            tmp = dictionary[reactant]
            for element in tmp:
                molecule = st.Molecule(element)

                #here it would be much more precise to have the molecule
                #that is going to be modified instead of just modifying the
                #first thing you find
                if element in conv:
                    #chunk = result[1] if reactant == max(namingConvention,key=len) else result[0]
                    component = st.Component(res[0])
                    component.addState(res[1])
                    molecule.addComponent(component, 1)
                    flag = True
                    finalMolecule = molecule
                    '''
                    else:
                        if conv in reactant:
                            
                            component = st.Component(res[0])
                            component.addState(res[1])
                            molecule.addComponent(component,1)
                            print conv,molecule,element
                            flag = True
                            #continue
                    '''
                    #FIXME: the comparison should be done a lil more carefully
                    #to avoid overlap

                    species.addMolecule(molecule, True)
                    if str(species) == '':
                        species.addMolecule(molecule)
                    break
            if flag:

                if reactant not in translator:
                    translator[reactant] = species

                else:
                    translator[reactant].extend(species, False)
                if finalMolecule.name in translator:
                    if len(translator[finalMolecule.name].molecules) == 1:
                        sp = st.Species()
                        sp.addMolecule(deepcopy(finalMolecule))
                        translator[finalMolecule.name].extend(sp, False)
                        translator[finalMolecule.name].reset()
                else:
                    sp = st.Species()
                    sp.addMolecule(molecule)
                    translator[molecule.name] = deepcopy(sp)

    if len(original[0]) < len(original[1]):
        rebalance(original, sortedConvention, translator)
예제 #15
0
def getIntersection(reactants,
                    product,
                    dictionary,
                    rawDatabase,
                    translator,
                    synthesisDatabase,
                    originalProductName,
                    outputFlag=False):
    '''
    this method goes through two complexes and tries to check how they
    get together to create a product (e.g. how their components link)
    either by using previous knowledge or by creating a new complex
    '''
    #global log

    extended1 = (copy(dictionary[reactants[0]]))
    extended2 = (copy(dictionary[reactants[1]]))
    if isinstance(extended1, str):
        extended1 = [extended1]
    if isinstance(extended2, str):
        extended2 = [extended2]
    #if we can find an element in the database that is a subset of
    #union(extended1,extended2) we take it
    intersection = findIntersection(extended1, extended2, synthesisDatabase)
    #otherwise we create it from scratch
    if not intersection:
        r1 = getFreeRadical(extended1, extended2[0], rawDatabase, translator,
                            product, dictionary)
        r2 = getFreeRadical(extended2, extended1[0], rawDatabase, translator,
                            product, dictionary)
        binding1, binding2 = getBindingPoints(extended1, extended2, reactants,
                                              originalProductName[0])
        if not r1 or not r2:
            #prin   t 'Cannot infer how',extended1,'binds to',extended2
            #log['reactions'].append((reactants,product))
            #return None,None,None
            #TODO this section should be activated by a flag instead
            #of being accessed by default
            #print extended1,extended2

            createIntersection((binding1, binding2), rawDatabase, translator,
                               dictionary)
            r1 = getFreeRadical((binding1, ), binding2, rawDatabase,
                                translator, product, dictionary)
            r2 = getFreeRadical((binding2, ), binding1, rawDatabase,
                                translator, product, dictionary)
            #print 'rrrrrrrrrrr',r1,r2
            if not r1 or not r2:
                return (None, None, None, None)

        ##todo: modify code to allow for carry over free radicals
        #FIXME: we can remove synthesisDatabase easily
        species = st.Species()
        if reactants[0] in translator:
            species.append(translator[reactants[0]])
        if reactants[1] in translator:
            species.append(translator[reactants[1]])
        if outputFlag:
            print '-----------', species, reactants[0], reactants[
                1], reactants[1] in translator
            print '+++', binding1, binding2, r1, r2

        bondName = max(species.getBondNumbers()) + 1
        molecule1 = st.Molecule(binding1)
        molecule2 = st.Molecule(binding2)
        component1 = st.Component(r1)
        component2 = st.Component(r2)
        component1.addBond(str(bondName))
        component2.addBond(str(bondName))
        molecule1.addComponent(component1)
        molecule2.addComponent(component2)

        if outputFlag:
            print '////////', molecule1, molecule2
        species.addMolecule(molecule1, True, 1)
        counter = 2 if binding1 == binding2 else 1
        species.addMolecule(molecule2, True, counter)
        if outputFlag:
            print '\\\\\\', species
        ####TODO: update the rawDAtabase with the m1m2 information
        sp = st.Species()
        sp.addMolecule(deepcopy(molecule1))
        if molecule1.name in translator:
            translator[molecule1.name].extend(sp)
        else:
            translator[molecule1.name] = sp
        translator[molecule1.name].reset()
        sp = st.Species()
        sp.addMolecule(deepcopy(molecule2))
        if molecule2.name in translator:
            translator[molecule2.name].extend(sp)
        else:
            translator[molecule2.name] = sp
        translator[molecule2.name].reset()
        if outputFlag:
            print '||||||||||||||||||||||', translator[molecule2.name]
        #print reactants,product,str(species)
        #print name1,name2,extended1,extended2
        #print {x:str(translator[x]) for x in translator}, translator
        if len(species.molecules) == 0:
            return (None, None, None, None)
        return species, [], [], []
    return extended1, extended2, intersection, []
예제 #16
0
def synthesis(original,
              dictionary,
              rawDatabase,
              synthesisDatabase,
              translator,
              outputFlag=False):
    #reaction = []
    for elements in original:
        #temp = []
        for sbml_name in elements:
            ## If we have translated it before and its in mem   ory
            #           if molecule in translator:
            #               species.append(translator[molecule])
            #           else:
            if outputFlag:
                print '-', sbml_name
            #if 'EGF_EGFRim2_GAP_Grb2_Sos_Ras_GTP' in original[1]:
            #    print original
            #if 'P_KKK_KK' in translator:
            #    print 'hola'
            if 'EGF_EGFR2_PLCg' in original[1]:
                print original
            tags, molecules = findCorrespondence(original[0], original[1],
                                                 dictionary, sbml_name,
                                                 rawDatabase,
                                                 synthesisDatabase, translator,
                                                 outputFlag)

            if (tags, molecules) == (None, None):
                tmp = st.Species()
                tmp.addMolecule(st.Molecule(sbml_name))
                if sbml_name not in translator:
                    translator[sbml_name] = tmp
                    #raise InsufficientInformationError
                libsbml2bngl.log['reactions'].append(original)
            #TODO: probably we will need to add a check if there are several ways of defining a reaction
            elif isinstance(molecules, st.Species):
                #FIXME: there shouls be a better way to check whether i actually want to check or not
                if tags not in translator:
                    translator[tags] = molecules
            else:
                #tags = list(tags)
                #tags.sort()
                #tags = tuple(tags)
                precursors = []
                if sbml_name not in translator:
                    species = st.Species()
                    #here we check if the new species is made of already existing molecules

                else:
                    species = translator[sbml_name]
                species.addChunk(tags, molecules, precursors)
                if sbml_name not in translator:
                    other = original[
                        0] if original[0] != elements else original[1]
                    for tag in [x for x in other if x in translator]:
                        species.extend(translator[tag])
                translator[sbml_name] = species
                if tags not in synthesisDatabase and tags not in rawDatabase:
                    synthesisDatabase[tags] = tuple(molecules)
    return 0
예제 #17
0
def get_training_set(run_name, use_pickle=True, pickle_file_name=None):
    # Take care of pickle file I/O
    # Get file name
    if pickle_file_name is None:
        pfile = "training_sets/training_set.pickle"
    else:
        pfile = pickle_file_name

    system = None
    # If the pickle file does not exist, then make it
    # If use_pickle is False, then make the read in the data from the
    # training_sets folder
    if not os.path.isfile(pfile) or not use_pickle:
        if pickle_file_name is not None:
            raise Exception("Requested file %s, but unable to read it in." %
                            pickle_file_name)

        # Generate the pickle itself if it doesn't exist
        # Create the size of the box to be 1000 x 100 x 100 to hold your
        # training sets
        system = structures.System(box_size=[1e3, 100.0, 100.0],
                                   name="training_set")
        systems_by_composition = {}

        # For each folder in the training_sets folder lets get the cml file
        # we want and write the energies and forces for that file
        for name in os.listdir("training_sets"):
            # We'll read in any training subset that succeeded and print
            # a warning on those that failed
            try:
                result = orca.read("training_sets/%s/%s.out" % (name, name))
            except IOError:
                print(
                    "Warning - Training Subset %s not included as results \
not found..." % name)
                continue

            # Parse the force output and change units. In the case of no force
            # found, do not use this set of data
            try:
                forces = orca.engrad_read("training_sets/%s/%s.orca.engrad" %
                                          (name, name),
                                          pos="Ang")[0]

                # Convert force from Ha/Bohr to kcal/mol-Ang
                def convert(x):
                    units.convert_dist("Ang", "Bohr",
                                       units.convert_energy("Ha", "kcal", x))

                for a, b in zip(result.atoms, forces):
                    a.fx = convert(b.fx)
                    a.fy = convert(b.fy)
                    a.fz = convert(b.fz)
            except (IndexError, IOError):
                print(
                    "Warning - Training Subset %s not included as results \
not found..." % name)
                continue

            # Get the bonding information
            with_bonds = structures.Molecule("training_sets/%s/system.cml" %
                                             name,
                                             extra_parameters=extra_Pb,
                                             test_charges=False)

            # Copy over the forces read in into the system that has the
            # bonding information
            for a, b in zip(with_bonds.atoms, result.atoms):
                a.fx, a.fy, a.fz = b.fx, b.fy, b.fz
                if geometry.dist(a, b) > 1e-4:
                    # sanity check on atom positions
                    raise Exception('Atoms are different:', (a.x, a.y, a.z),
                                    (b.x, b.y, b.z))

            # Rename some things
            with_bonds.energy = result.energy
            with_bonds.name = name

            # Now, we read in all the potential three-body interactions that
            # our training set takes into account
            # This will be in a 1D array
            composition = ' '.join(sorted([a.element for a in result.atoms]))
            if composition not in systems_by_composition:
                systems_by_composition[composition] = []
            systems_by_composition[composition].append(with_bonds)

        # Generate (1) xyz file of various systems as different time steps and
        # (2) system to simulate
        xyz_atoms = []
        to_delete = []
        for i, composition in enumerate(systems_by_composition):
            # Sort so that the lowest energy training subset is first in
            # the system
            systems_by_composition[composition].sort(key=lambda s: s.energy)
            baseline_energy = systems_by_composition[composition][0].energy
            # Offset the energies by the lowest energy, convert units of
            # the energy
            for j, s in enumerate(systems_by_composition[composition]):
                s.energy -= baseline_energy
                s.energy = units.convert_energy("Ha", "kcal/mol", s.energy)
                # Don't use high-energy systems, because these will not likely
                # be sampled in MD
                if s.energy > 500.0:
                    to_delete.append([composition, j])
                    continue
                # For testing purposes, output
                print "DEBUG:", s.name, s.energy
                xyz_atoms.append(s.atoms)
                system.add(s, len(system.molecules) * 1000.0)

        # Delete the system_names that we aren't actually using due to
        # energy being too high
        to_delete = sorted(to_delete, key=lambda x: x[1])[::-1]
        for d1, d2 in to_delete:
            print "Warning - Training Subset %s not included as energy is too \
high..." % systems_by_composition[d1][d2].name
            del systems_by_composition[d1][d2]

        # Make the box just a little bigger (100) so that we can fit all
        # our systems
        system.xhi = len(system.molecules) * 1000.0 + 100.0

        # Write all of the states we are using to training_sets.xyz
        if not os.path.isdir("training_sets"):
            os.mkdir("training_sets")
        os.chdir("training_sets")
        files.write_xyz(xyz_atoms, 'training_sets')
        os.chdir("../")
        # Generate our pickle file if desired
        if use_pickle:
            print("Saving pickle file %s..." % pfile)
            fptr = open(pfile, "wb")
            pickle.dump([system, systems_by_composition], fptr)
            fptr.close()

    # If use_pickle is true AND the pickle file exists, then we can just
    # read it in
    if system is None and use_pickle:
        print("Reading pickle file %s..." % pfile)
        fptr = open(pfile, "rb")
        system, systems_by_composition = pickle.load(fptr)
        system.name = run_name
        fptr.close()
    elif system is None:
        raise Exception("Requested file %s, but unable to read it in." % pfile)

    # Now we have the data, save it to files for this simulation of "run_name"
    # and return parameters
    if not os.path.isdir("lammps"):
        os.mkdir("lammps")
    if not os.path.isdir("lammps/%s" % run_name):
        os.mkdir("lammps/%s" % run_name)
    os.chdir("lammps/%s" % run_name)
    mcsmrff_files.write_system_and_training_data(run_name, system,
                                                 systems_by_composition)
    os.chdir("../../")

    return system, systems_by_composition
예제 #18
0
def createBindingRBM(element, translator, dependencyGraph, bioGridFlag):
    species = st.Species()
    #go over the sct and reuse existing stuff
    for molecule in dependencyGraph[element[0]][0]:
        if molecule in translator:
            tmpSpecies = translator[molecule]
            if molecule != getTrueTag(dependencyGraph, molecule):
                original = translator[getTrueTag(dependencyGraph, molecule)]
                updateSpecies(tmpSpecies, original.molecules[0])
            species.addMolecule(deepcopy(tmpSpecies.molecules[0]))
        else:
            mol = st.Molecule(molecule)
            dependencyGraph[molecule] = deepcopy(mol)
            species.addMolecule(mol)
    #how do things bind together?
    moleculePairsList = getComplexationComponents2(species, bioGridFlag)
    #TODO: update basic molecules with new components
    #translator[molecule[0].name].molecules[0].components.append(deepcopy(newComponent1))
    #translator[molecule[1].name].molecules[0].components.append(deepcopy(newComponent2))
    for idx, molecule in enumerate(moleculePairsList):
        flag = False
        #add bonds where binding components already exist
        for component in molecule[0].components:
            if component.name == molecule[1].name.lower() and \
            len(component.bonds) == 0:
                component.bonds.append(idx)
                flag = True
                break
        if not flag:
            #create components if they dont exist already.
            #Add a bond afterwards
            newComponent1 = st.Component(molecule[1].name.lower())

            molecule[0].components.append(newComponent1)

            if newComponent1.name not in [x.name for x in translator[molecule[0].name].molecules[0]. \
            components]:
                translator[molecule[0].name].molecules[0]. \
                components.append(deepcopy(newComponent1))

            molecule[0].components[-1].bonds.append(idx)
        flag = False
        #same thing for the other member of the bond
        for component in molecule[1].components:
            if component.name == molecule[0].name.lower() and len(
                    component.bonds) == 0:
                component.bonds.append(idx)
                flag = True
                break
        if not flag:
            newComponent2 = st.Component(molecule[0].name.lower())
            molecule[1].components.append(newComponent2)
            if molecule[0].name != molecule[1].name:
                if newComponent2.name not in [x.name for x in translator[molecule[0].name].molecules[0]. \
                components]:
                    translator[
                        molecule[1].name].molecules[0].components.append(
                            deepcopy(newComponent2))
            molecule[1].components[-1].bonds.append(idx)

    #update the translator
    translator[element[0]] = species
예제 #19
0
def pickle_training_set(run_name,
                        training_sets_folder="training_set",
                        pickle_file_name="training_set",
                        high_energy_cutoff=500.0,
                        system_x_offset=1000.0,
                        verbose=False,
                        extra_parameters={}):
    """
    A function to pickle together the training set in a manner that is
    readable for MCSMRFF.  This is a single LAMMPs data file with each
    training set offset alongst the x-axis by system_x_offset.  The pickle
    file, when read in later, holds a list of two objects.  The first is
    the entire system as described above.  The second is a dictionary of all
    molecules in the system, organized by composition.

    **Parameters**

        run_name: *str*
            Name of final training set.
        training_sets_folder: *str, optional*
            Path to the folder where all the training set data is.
        pickle_file_name: *str, optional*
            A name for the pickle file and training set system.
        high_energy_cutoff: *float, optional*
            A cutoff for systems that are too large in energy, as MD is likely
            never to sample them.
        system_x_offset: *float, optional*
            The x offset for the systems to be added by.
        verbose: *bool, optional*
            Whether to have additional stdout or not.
        extra_parameters: *dict, optional*
            A dictionaries for additional parameters that do not exist
            in the default OPLSAA parameter file.

    **Returns**

        system: *System*
            The entire training set system.
        systems_by_composition: *dict, list, Molecule*
            Each molecule organized in this hash table.
    """
    # Take care of pickle file I/O
    if training_sets_folder.endswith("/"):
        training_sets_folder = training_sets_folder[:-1]
    if pickle_file_name is not None and pickle_file_name.endswith(".pickle"):
        pickle_file_name = pickle_file_name.split(".pickle")[0]
    pfile = training_sets_folder + "/" + pickle_file_name + ".pickle"
    sys_name = pickle_file_name
    if os.path.isfile(pfile):
        raise Exception("Pickled training set already exists!")

    # Generate empty system for your training set
    system = None
    system = structures.System(box_size=[1e3, 100.0, 100.0], name=sys_name)
    systems_by_composition = {}

    # For each folder in the training_sets folder lets get the cml file we
    # want and write the energies and forces for that file
    for name in os.listdir(training_sets_folder):
        # We'll read in any training subset that succeeded and print a warning
        # on those that failed
        try:
            result = orca.read("%s/%s/%s.out" %
                               (training_sets_folder, name, name))
        except IOError:
            print(
                "Warning - Training Subset %s not included as \
out file not found..." % name)
            continue

        # Check for convergence
        if not result.converged:
            print("Warning - Results for %s have not converged." % name)
            continue

        # Parse the force output and change units. In the case of no force
        # found, do not use this set of data
        try:
            forces = orca.engrad_read("%s/%s/%s.orca.engrad" %
                                      (training_sets_folder, name, name),
                                      pos="Ang")[0]

            # Convert force from Ha/Bohr to kcal/mol-Ang
            def convert(x):
                return units.convert_dist(
                    "Ang", "Bohr", units.convert_energy("Ha", "kcal", x))

            for a, b in zip(result.atoms, forces):
                a.fx, a.fy, a.fz = convert(b.fx), convert(b.fy), convert(b.fz)
        except (IndexError, IOError):
            print(
                "Warning - Training Subset %s not included as \
results not found..." % name)
            continue

        # Get the bonding information
        with_bonds = structures.Molecule("%s/%s/%s.cml" %
                                         (training_sets_folder, name, name),
                                         extra_parameters=extra_parameters,
                                         allow_errors=True,
                                         test_charges=False)

        # Copy over the forces read in into the system that has the bonding
        # information
        for a, b in zip(with_bonds.atoms, result.atoms):
            a.fx, a.fy, a.fz = b.fx, b.fy, b.fz
            # sanity check on atom positions
            if geometry.dist(a, b) > 1e-4:
                raise Exception('Atoms are different:', (a.x, a.y, a.z),
                                (b.x, b.y, b.z))

        # Rename and save energy
        with_bonds.energy = result.energy
        with_bonds.name = name

        # Now, we read in all the potential three-body interactions that our
        # training set takes into account.  This will be in a 1D array
        composition = ' '.join(sorted([a.element for a in result.atoms]))
        if composition not in systems_by_composition:
            systems_by_composition[composition] = []
        systems_by_composition[composition].append(with_bonds)

    # Generate:
    #  (1) xyz file of various systems as different time steps
    #  (2) system to simulate
    xyz_atoms = []
    to_delete = []
    for i, composition in enumerate(systems_by_composition):
        # Sort so that the lowest energy training subset is first
        # in the system
        systems_by_composition[composition].sort(key=lambda s: s.energy)
        baseline_energy = systems_by_composition[composition][0].energy
        # Offset the energies by the lowest energy, and convert energy units
        for j, s in enumerate(systems_by_composition[composition]):
            s.energy -= baseline_energy
            s.energy = units.convert_energy("Ha", "kcal/mol", s.energy)
            # Don't use high-energy systems, because these will not likely
            # be sampled in MD
            if s.energy > high_energy_cutoff:
                to_delete.append([composition, j])
                continue
            # For testing purposes, output
            if verbose:
                print "Using:", s.name, s.energy
            xyz_atoms.append(s.atoms)
            system.add(s, len(system.molecules) * system_x_offset)

    # Delete the system_names that we aren't actually using due to energy
    # being too high
    to_delete = sorted(to_delete, key=lambda x: x[1])[::-1]
    for d1, d2 in to_delete:
        if verbose:
            print "Warning - Training Subset %s not included as energy \
is too high..." % systems_by_composition[d1][d2].name
        del systems_by_composition[d1][d2]

    # Make the box just a little bigger (100) so that we can fit all our
    # systems
    system.xhi = len(system.molecules) * system_x_offset + 100.0

    # Write all of the states we are using to training_sets.xyz
    files.write_xyz(xyz_atoms, training_sets_folder + '/' + pickle_file_name)
    # Generate our pickle file
    print("Saving pickle file %s..." % pfile)
    fptr = open(pfile, "wb")
    pickle.dump([system, systems_by_composition], fptr)
    fptr.close()

    # Now we have the data, save it to files for this simulation of
    # "run_name" and return parameters
    if not os.path.isdir(run_name):
        os.mkdir(run_name)
    os.chdir(run_name)
    mcsmrff_files.write_system_and_training_data(run_name, system,
                                                 systems_by_composition)
    os.chdir("../")
    shutil.copyfile(pfile, "%s/%s.pickle" % (run_name, run_name))

    return system, systems_by_composition
예제 #20
0
def createEmptySpecies(name):
    species = st.Species()
    molecule = st.Molecule(name)
    species.addMolecule(molecule)
    return species
예제 #21
0
def generate_lead_halide_cation(halide, cation, ion="Pb", run_opt=True):
    cml_path = fpl_constants.cml_dir
    # Check if system exists
    fname = reduce_to_name(ion, halide, cation)
    if not cml_path.endswith("/"):
        cml_path += "/"

    if os.path.exists(cml_path + fname + ".cml"):
        print("Found system in cml folder, returning system")
        system = structures.Molecule(
            files.read_cml(cml_path + fname + ".cml",
                           test_charges=False,
                           allow_errors=True)[0])
        return system

    def vdw(y):
        return PERIODIC_TABLE[units.elem_s2i(y)]['vdw_r']

    # Get the PbX3 system
    PbX3 = generate_lead_halide(halide, ion=ion)
    # Get the cation from the cml file
    atoms, bonds, _, _ = files.read_cml(cml_path + cation + ".cml",
                                        test_charges=False,
                                        allow_errors=True)
    system = structures.Molecule(atoms)
    # Align along X axis
    system.atoms = geometry.align_centroid(system.atoms)[0]
    # Rotate to Z axis
    # NOTE! In case of FA, we want flat so only translate to origin instead
    # NOTE! We have exactly 3 cations we observe: Cs, MA, FA. If 2 N, then FA
    elems = [a.element for a in system.atoms]
    if elems.count("N") == 2:
        system.translate(system.get_center_of_mass())
    else:
        R = geometry.rotation_matrix([0, 1, 0], 90, units="deg")
        system.rotate(R)
    # If N and C in system, ensure N is below C (closer to Pb)
    if "N" in elems and "C" in elems:
        N_index = [i for i, a in enumerate(system.atoms)
                   if a.element == "N"][0]
        C_index = [i for i, a in enumerate(system.atoms)
                   if a.element == "C"][0]
        if system.atoms[N_index].z > system.atoms[C_index].z:
            # Flip if needed
            R = geometry.rotation_matrix([0, 1, 0], 180, units="deg")
            system.rotate(R)
    # Offset system so lowest point is at 0 in the z dir
    z_offset = min([a.z for a in system.atoms]) * -1
    system.translate([0, 0, z_offset])

    # Add to the PbX3 system with an offset of vdw(Pb)
    system.translate([0, 0, vdw(ion)])
    system.atoms += PbX3.atoms

    # Run a geometry optimization of this system
    if run_opt:
        PbXY = orca.job(fname,
                        fpl_constants.default_routes[0],
                        atoms=system.atoms,
                        extra_section=fpl_constants.extra_section,
                        queue="batch",
                        procs=2)
        PbXY.wait()
        new_pos = orca.read(fname).atoms
        for a, b in zip(system.atoms, new_pos):
            a.x, a.y, a.z = [b.x, b.y, b.z]

    # Set OPLS types
    for a in system.atoms:
        if a.element in [ion, "Cl", "Br", "I"]:
            a.type = fpl_constants.atom_types[a.element]
            a.type_index = a.type["index"]

    # Write cml file so we don't re-generate, and return system
    files.write_cml(system, bonds=bonds, name=cml_path + fname + ".cml")
    return system
예제 #22
0
def job(fpl_obj, task_name):

    input_script = '''units real
atom_style full
pair_style lj/cut/coul/dsf 0.05 10.0 10.0
bond_style harmonic
angle_style harmonic
dihedral_style opls

boundary p p p
read_data $RUN_NAME$.data

dump 1 all xyz 100 $RUN_NAME$.xyz

fix av all ave/time 1 100 100 c_thermo_pe
thermo_style custom step f_av pe temp press
thermo 100

group mobile id > $MOBILE$
group immobile subtract all mobile

$IMOBILE$

velocity mobile create 100.0 $SEED$ rot yes dist gaussian
velocity immobile set 0.0 0.0 0.0

fix relax mobile nve/limit 0.1
run 10000
unfix relax

fix motion mobile nvt temp 100.0 100.0 100.0

timestep 1.0
run $RUN_LEN$

write_restart $RUN_NAME$.restart'''

    # Setup input script
    solute = None
    if fpl_obj.solute is not None:
        solute = structures.Molecule(fpl_constants.cml_dir + fpl_obj.solute,
                                     test_charges=False,
                                     allow_errors=True)
    mobile = str(len(solute.atoms) if solute else 0)
    input_script = fpl_utils.input_variable("$MOBILE$", mobile, input_script)

    input_script = fpl_utils.input_variable("$RUN_NAME$", task_name,
                                            input_script)
    input_script = fpl_utils.input_variable("$SEED$", fpl_obj.seed,
                                            input_script)
    input_script = fpl_utils.input_variable("$RUN_LEN$", fpl_obj.lmp_run_len,
                                            input_script)

    imobile = ""
    if solute is not None:
        imobile = "velocity immobile zero linear\nfix freeze immobile setforce 0.0 0.0 0.0"
    input_script = fpl_utils.input_variable("$IMOBILE$", imobile, input_script)

    # Now we can generate the task
    # NOTE! Because the data file is written by the system name, we want to overwrite the system name here
    fpl_obj.system.name = task_name

    small_lammps_task = lammps_job.lmp_task(task_name,
                                            fpl_obj.system,
                                            queue=fpl_obj.queue,
                                            procs=fpl_obj.procs,
                                            priority=fpl_obj.priority,
                                            xhosts=fpl_obj.xhosts)

    small_lammps_task.set_parameters(
        input_script,
        email=fpl_obj.email,
        pair_coeffs_included=fpl_obj.pair_coeffs_included,
        hybrid_pair=fpl_obj.hybrid_pair,
        hybrid_angle=fpl_obj.hybrid_angle,
        trj_file=fpl_obj.trj_file,
        xyz_file=fpl_obj.xyz_file,
        read_atoms=fpl_obj.read_atoms,
        read_timesteps=fpl_obj.read_timesteps,
        read_num_atoms=fpl_obj.read_num_atoms,
        read_box_bounds=fpl_obj.read_box_bounds)

    small_lammps_task.callback = callback_grab_final

    return small_lammps_task