Beispiel #1
0
 def getenv(self, sp, i):
     if sp in self.env and i < len(self.env[sp]):
         return self.env[sp][i]
     else:
         return environ(
             self.nmax, self.lmax, self.alchem, sp
         )  # missing atoms environments just returned as isolated species!
Beispiel #2
0
    def parse(self,
              fat,
              coff=5.0,
              cotw=0.5,
              nmax=4,
              lmax=3,
              gs=0.5,
              cw=1.0,
              nocenter=[],
              noatom=[],
              unsoap=False,
              kit=None,
              soapdump=None):
        """ Takes a frame in the QUIPPY format and computes a list of its environments. """

        # removes atoms that are to be ignored
        at = fat.copy()
        nol = []
        for s in range(1, at.z.size + 1):
            if at.z[s] in noatom: nol.append(s)
        if len(nol) > 0: at.remove_atoms(nol)

        self.nmax = nmax
        self.lmax = lmax
        self.atz = at.z.copy()
        self.species = {}
        for z in at.z:
            if z in self.species: self.species[z] += 1
            else: self.species[z] = 1

        self.zspecies = self.species.keys()
        self.zspecies.sort()
        lspecies = 'n_species=' + str(len(self.zspecies)) + ' species_Z={ '
        for z in self.zspecies:
            lspecies = lspecies + str(z) + ' '
        lspecies = lspecies + '}'

        at.set_cutoff(coff)
        at.calc_connect()

        self.nenv = 0
        if not soapdump is None:
            soapdump.write("####### SOAP VECTOR FRAME ######\n")
        for sp in self.species:

            if sp in nocenter:
                self.species[sp] = 0
                continue  # Option to skip some environments

            # first computes the descriptors of species that are present
            if not soapdump is None:
                sys.stderr.write(
                    "SOAP STRING:    " +
                    "soap central_reference_all_species=F central_weight=" +
                    str(cw) + "  covariance_sigma0=0.0 atom_sigma=" + str(gs) +
                    " cutoff=" + str(coff) + " cutoff_transition_width=" +
                    str(cotw) + " n_max=" + str(nmax) + " l_max=" + str(lmax) +
                    ' ' + lspecies + ' Z=' + str(sp) + "\n")
            desc = quippy.descriptors.Descriptor(
                "soap central_reference_all_species=F " +
                ("normalise=F" if unsoap else "") + " central_weight=" +
                str(cw) + "  covariance_sigma0=0.0 atom_sigma=" + str(gs) +
                " cutoff=" + str(coff) + " cutoff_transition_width=" +
                str(cotw) + " n_max=" + str(nmax) + " l_max=" + str(lmax) +
                ' ' + lspecies + ' Z=' + str(sp))
            try:
                psp = desc.calc(at)["descriptor"]
            except TypeError:
                print("Interface change in QUIP/GAP. Update your code first.")

            if not soapdump is None:
                soapdump.write("Specie %d - %d atoms\n" % (sp, len(psp)))
                for p in psp:
                    np.savetxt(soapdump, [p])

            # now repartitions soaps in environment descriptors
            lenv = []
            for p in psp:
                nenv = environ(nmax, lmax, self.alchem)
                nenv.convert(sp, self.zspecies, p, unsoap)
                lenv.append(nenv)
            self.env[sp] = lenv
            self.nenv += self.species[sp]

        # adds kit data
        if kit is None: kit = {}

        for sp in kit:
            if not sp in self.species:
                self.species[sp] = 0
                self.env[sp] = []
            for k in range(self.species[sp], kit[sp]):
                self.env[sp].append(
                    environ(self.nmax, self.lmax, self.alchem, sp))
                self.nenv += 1
            self.species[sp] = kit[sp]

        self.zspecies = self.species.keys()
        self.zspecies.sort()

        # also compute the global (flattened) fingerprint
        self.globenv = environ(nmax, lmax, self.alchem)

        for k, se in self.env.items():
            for e in se:
                self.globenv.add(e)
        # divides by the number of atoms in the structure
        for sij in self.globenv.soaps:
            self.globenv.soaps[sij] *= 1.0 / self.nenv
Beispiel #3
0
def structk(strucA,
            strucB,
            alchem=alchemy(),
            peratom=False,
            mode="match",
            fout=None,
            peps=0.0,
            gamma=1.0,
            zeta=1.0,
            xspecies=False):
    # computes the SOAP similarity KERNEL between two structures by combining atom-centered kernels
    # possible kernel modes include:
    #   average :  scalar product between averaged kernels
    #   match:     best-match hungarian kernel
    #   permanent: average over all permutations

    # average kernel. quick & easy!
    if mode == "fastavg":
        genvA = strucA.globenv
        genvB = strucB.globenv
        return envk(genvA, genvB, alchem)**zeta, 0
    elif mode == "fastspecies":
        # for now, only implement standard Kronecker alchemy
        senvB = environ(strucB.nmax, strucB.lmax, strucB.alchem)
        kk = 0
        for za in strucA.zspecies:
            if not za in strucB.zspecies: continue
            senvA = environ(strucA.nmax, strucA.lmax, strucA.alchem)
            for ia in xrange(strucA.getnz(za)):
                senvA.add(strucA.getenv(za, ia))
            senvB = environ(strucB.nmax, strucB.lmax, strucB.alchem)
            for ib in xrange(strucB.getnz(za)):
                senvB.add(strucB.getenv(za, ib))
            kk += envk(senvA, senvB, alchem)**zeta

        kk /= strucA.nenv * strucB.nenv
        return kk, 0

        #  for zb, nzb in nspeciesB:
        #         for ib in xrange(nzb):
        #            return envk(genvA, genvB, alchem), 0

    nenv = 0

    if peratom:  # replicate structures to match structures of different peratomity
        # we do not check for compatibility at this stage, just assume that the
        # matching will be done somehow (otherwise it would be exceedingly hard to manage in case of non-standard alchemy)
        nspeciesA = []
        nspeciesB = []
        for z in strucA.zspecies:
            nspeciesA.append((z, strucA.getnz(z)))
        for z in strucB.zspecies:
            nspeciesB.append((z, strucB.getnz(z)))
        nenv = nenvA = strucA.nenv
        nenvB = strucB.nenv
    else:
        # top up missing atoms with isolated environments
        # first checks which atoms are present
        zspecies = sorted(list(set(strucB.zspecies + strucA.zspecies)))
        nspecies = []
        for z in zspecies:
            nz = max(strucA.getnz(z), strucB.getnz(z))
            nspecies.append((z, nz))
            nenv += nz
        nenvA = nenvB = nenv
        nspeciesA = nspeciesB = nspecies

    np.set_printoptions(linewidth=500, precision=4)

    kk = np.zeros((nenvA, nenvB), float)
    ika = 0
    ikb = 0
    for za, nza in nspeciesA:
        for ia in xrange(nza):
            envA = strucA.getenv(za, ia)
            ikb = 0
            for zb, nzb in nspeciesB:
                for ib in xrange(nzb):
                    envB = strucB.getenv(zb, ib)
                    if alchem.mu > 0 and (strucA.ismissing(za, ia)
                                          ^ strucB.ismissing(zb, ib)):
                        # includes a penalty dependent on "mu", in a way that is consistent with the definition of kernel distance
                        kk[ika, ikb] = exp(-alchem.mu)
                    else:
                        if za == zb or not xspecies:  #uncomment to zero out kernels between different species
                            kk[ika, ikb] = envk(envA, envB, alchem)**zeta
                        else:
                            kk[ika, ikb] = 0
                    ikb += 1
            ika += 1
    aidx = {}
    ika = 0
    for za, nza in nspeciesA:
        aidx[za] = range(ika, ika + nza)
        ika += nza
    ikb = 0
    bidx = {}
    for zb, nzb in nspeciesB:
        bidx[zb] = range(ikb, ikb + nzb)
        ikb += nzb

    if fout != None:
        # prints out similarity information for the environment pairs
        fout.write(
            "# atomic species in the molecules (possibly topped up with dummy isolated atoms): \n"
        )
        for za, nza in nspeciesA:
            for ia in xrange(nza):
                fout.write(" %d " % (za))
        fout.write("\n")
        for zb, nzb in nspeciesB:
            for ib in xrange(nzb):
                fout.write(" %d " % (zb))
        fout.write("\n")

        fout.write("# environment kernel matrix: \n")
        for r in kk:
            for e in r:
                fout.write("%20.14e " % (e))
            fout.write("\n")
        #fout.write("# environment kernel eigenvalues: \n")
        #ev = np.linalg.eigvals(kk)
        #for e in ev:
        #    fout.write("(%8.4e,%8.4e) " % (e.real,e.imag) )
        #fout.write("\n");

    # Now we have the matrix of scalar products.
    # We can first find the optimal scalar product kernel
    # we must find the maximum "cost"
    if mode == "match":
        if peratom and nenvA != nenvB:
            nenv = lcm(nenvA, nenvB)
            hun = lcm_best_cost(1 - kk)
        else:
            hun = best_cost(1.0 - kk)
        cost = 1 - hun / nenv
    elif mode == "permanent":
        # there is no place to hide: cross-species environments are not necessarily zero
        if peps > 0: cost = mcperm(kk, peps)
        else: cost = xperm(kk)

        cost = cost / np.math.factorial(nenv) / nenv
    elif mode == "rematch":
        cost = rematch(kk, gamma,
                       1e-6)  # hard-coded residual error for regularized gamma
        # print cost, kk.sum()/(nenv*nenv), envk(strucA.globenv, strucB.globenv, alchem)
    elif mode == "average":
        cost = kk.sum() / (nenvA * nenvB)
        # print 'elem: {}'.format(kk.sum())
        # print 'elem norm: {}'.format(cost)
        # print 'avg norm: {}'.format((nenvA*nenvB))

    else:
        raise ValueError("Unknown global fingerprint mode ", mode)

    return cost, kk
Beispiel #4
0
   def parse(self, fat, coff=5.0, cotw=0.5, nmax=4, lmax=3, gs=0.5, cw=1.0, nocenter=[], noatom=[], kit=None, soapdump=None):
      """ Takes a frame in the QUIPPY format and computes a list of its environments. """
      
      # removes atoms that are to be ignored
      at = fat.copy()
      nol = []
      for s in range(1,at.z.size+1):
         if at.z[s] in noatom: nol.append(s)
      if len(nol)>0: at.remove_atoms(nol)
      
      
      self.nmax = nmax
      self.lmax = lmax
      self.atz = at.z.copy()
      self.species = {}
      for z in at.z:      
         if z in self.species: self.species[z]+=1
         else: self.species[z] = 1
            
      self.zspecies = self.species.keys();
      self.zspecies.sort(); 
      lspecies = 'n_species='+str(len(self.zspecies))+' species_Z={ '
      for z in self.zspecies: lspecies = lspecies + str(z) + ' '
      lspecies = lspecies + '}'
   
      at.set_cutoff(coff);
      at.calc_connect();
      
      self.nenv = 0
      if not soapdump is None:
         soapdump.write("####### SOAP VECTOR FRAME ######\n")
      for sp in self.species:
         
         if sp in nocenter: 
            self.species[sp]=0
            continue # Option to skip some environments
         
         # first computes the descriptors of species that are present
         if not soapdump is None: sys.stderr.write("SOAP STRING:    "+"soap central_reference_all_species=F central_weight="+str(cw)+"  covariance_sigma0=0.0 atom_sigma="+str(gs)+" cutoff="+str(coff)+" cutoff_transition_width="+str(cotw)+" n_max="+str(nmax)+" l_max="+str(lmax)+' '+lspecies+' Z='+str(sp)+"\n")
         desc = quippy.descriptors.Descriptor("soap central_reference_all_species=F central_weight="+str(cw)+"  covariance_sigma0=0.0 atom_sigma="+str(gs)+" cutoff="+str(coff)+" cutoff_transition_width="+str(cotw)+" n_max="+str(nmax)+" l_max="+str(lmax)+' '+lspecies+' Z='+str(sp) )   
         try:
            psp = desc.calc(at)["descriptor"].T
         except TypeError:
            print("Interface change in QUIP/GAP. Update your code first.")

         if not soapdump is None:
            soapdump.write("Specie %d - %d atoms\n"% (sp,len(psp)))
            for p in psp:
                np.savetxt(soapdump,[p])

         # now repartitions soaps in environment descriptors
         lenv = []
         for p in psp:
            nenv = environ(nmax, lmax, self.alchem)
            nenv.convert(sp, self.zspecies, p)
            lenv.append(nenv)
         self.env[sp] = lenv
         self.nenv += self.species[sp]
         
      # adds kit data   
      if kit is None: kit = {}
      
      for sp in kit:         
         if not sp in self.species: 
            self.species[sp]=0
            self.env[sp] = []
         for k in range(self.species[sp], kit[sp]):            
            self.env[sp].append(environ(self.nmax,self.lmax,self.alchem,sp))
            self.nenv+=1
         self.species[sp] = kit[sp]          
      
      self.zspecies = self.species.keys()
      self.zspecies.sort() 
      
      # also compute the global (flattened) fingerprint
      self.globenv = environ(nmax, lmax, self.alchem)
      
      for k, se in self.env.items():
         for e in se:
            self.globenv.add(e)
      # divides by the number of atoms in the structure
      for sij in self.globenv.soaps:  self.globenv.soaps[sij]*=1.0/self.nenv
Beispiel #5
0
 def getenv(self, sp, i):
    if sp in self.env and i<len(self.env[sp]):
       return self.env[sp][i]
    else: 
       return environ(self.nmax,self.lmax,self.alchem,sp)  # missing atoms environments just returned as isolated species!
Beispiel #6
0
def structk(strucA, strucB, alchem=alchemy(), periodic=False, mode="match", fout=None, peps=0.0, gamma=1.0, zeta=1.0, xspecies=False):
   # computes the SOAP similarity KERNEL between two structures by combining atom-centered kernels
   # possible kernel modes include:
   #   average :  scalar product between averaged kernels
   #   match:     best-match hungarian kernel
   #   permanent: average over all permutations
      
   # average kernel. quick & easy!   
   if mode=="fastavg":
       genvA=strucA.globenv
       genvB=strucB.globenv        
       return envk(genvA, genvB, alchem)**zeta, 0
   elif mode=="fastspecies": 
       # for now, only implement standard Kronecker alchemy
       senvB = environ(strucB.nmax, strucB.lmax, strucB.alchem)
       kk = 0
       for za in strucA.zspecies:    
         if not za in strucB.zspecies: continue         
         senvA = environ(strucA.nmax, strucA.lmax, strucA.alchem)
         for ia in xrange(strucA.getnz(za)):
            senvA.add(strucA.getenv(za, ia))
         senvB = environ(strucB.nmax, strucB.lmax, strucB.alchem)   
         for ib in xrange(strucB.getnz(za)):
            senvB.add(strucB.getenv(za, ib))
         kk += envk(senvA, senvB, alchem)**zeta
       
       kk/=strucA.nenv*strucB.nenv
       return kk,0
         
       #  for zb, nzb in nspeciesB:
       #         for ib in xrange(nzb):
       #            return envk(genvA, genvB, alchem), 0

   nenv = 0
   
   if periodic: # replicate structures to match structures of different periodicity
      # we do not check for compatibility at this stage, just assume that the 
      # matching will be done somehow (otherwise it would be exceedingly hard to manage in case of non-standard alchemy)
      nspeciesA = []
      nspeciesB = []
      for z in strucA.zspecies:
         nspeciesA.append( (z, strucA.getnz(z)) )
      for z in strucB.zspecies:
         nspeciesB.append( (z, strucB.getnz(z)) )
      nenv=nenvA = strucA.nenv
      nenvB = strucB.nenv            
   else:   
      # top up missing atoms with isolated environments
      # first checks which atoms are present
      zspecies = sorted(list(set(strucB.zspecies+strucA.zspecies)))
      nspecies = []
      for z in zspecies:
         nz = max(strucA.getnz(z),strucB.getnz(z))
         nspecies.append((z,nz)) 
         nenv += nz
      nenvA = nenvB = nenv
      nspeciesA = nspeciesB = nspecies   
         
   np.set_printoptions(linewidth=500,precision=4)

   kk = np.zeros((nenvA,nenvB),float)
   ika = 0
   ikb = 0  
   for za, nza in nspeciesA:      
      for ia in xrange(nza):
         envA = strucA.getenv(za, ia)         
         ikb = 0
         for zb, nzb in nspeciesB:
            for ib in xrange(nzb):
               envB = strucB.getenv(zb, ib)
               if alchem.mu > 0 and (strucA.ismissing(za, ia) ^ strucB.ismissing(zb, ib)):
                   # includes a penalty dependent on "mu", in a way that is consistent with the definition of kernel distance
                   kk[ika,ikb] = exp(-alchem.mu)
               else:
                  if za == zb or not xspecies:  #uncomment to zero out kernels between different species
                    kk[ika,ikb] = envk(envA, envB, alchem)**zeta              
                  else: kk[ika,ikb] = 0
               ikb+=1
         ika+=1
   aidx = {}
   ika=0
   for za, nza in nspeciesA: 
      aidx[za] = range(ika,ika+nza)
      ika+=nza
   ikb=0
   bidx = {}
   for zb, nzb in nspeciesB: 
      bidx[zb] = range(ikb,ikb+nzb)
      ikb+=nzb

   if fout != None:
      # prints out similarity information for the environment pairs
      fout.write("# atomic species in the molecules (possibly topped up with dummy isolated atoms): \n")      
      for za, nza in nspeciesA:
         for ia in xrange(nza): fout.write(" %d " % (za) )
      fout.write("\n");
      for zb, nzb in nspeciesB:
         for ib in xrange(nzb): fout.write(" %d " % (zb) )
      fout.write("\n");
      
      fout.write("# environment kernel matrix: \n")      
      for r in kk:
         for e in r:
            fout.write("%20.14e " % (e) )
         fout.write("\n")
      #fout.write("# environment kernel eigenvalues: \n")      
      #ev = np.linalg.eigvals(kk)
      #for e in ev:
      #    fout.write("(%8.4e,%8.4e) " % (e.real,e.imag) )
      #fout.write("\n");
         
       

      
   # Now we have the matrix of scalar products. 
   # We can first find the optimal scalar product kernel
   # we must find the maximum "cost"
   if mode == "match":
        if periodic and nenvA != nenvB:
            nenv = lcm(nenvA, nenvB)
            hun = lcm_best_cost(1-kk)
        else:
            hun=best_cost(1.0-kk)        
        cost = 1-hun/nenv
   elif mode == "permanent":
        # there is no place to hide: cross-species environments are not necessarily zero 
        if peps>0: cost = mcperm(kk, peps)
        else: cost = xperm(kk)
            
        cost = cost/np.math.factorial(nenv)/nenv        
   elif mode == "rematch":
       cost=rematch(kk, gamma, 1e-6)  # hard-coded residual error for regularized gamma
       # print cost, kk.sum()/(nenv*nenv), envk(strucA.globenv, strucB.globenv, alchem)
   elif mode == "average":
       cost = kk.sum()/(nenvA*nenvB)
       # print 'elem: {}'.format(kk.sum()) 
       # print 'elem norm: {}'.format(cost) 
       # print 'avg norm: {}'.format((nenvA*nenvB)) 
       
   else: raise ValueError("Unknown global fingerprint mode ", mode)
   
         
   return cost,kk
Beispiel #7
0
    def parse(self,
              fat,
              coff=5.0,
              cotw=0.5,
              nmax=4,
              lmax=3,
              gs=0.5,
              cw=1.0,
              nocenter=[],
              noatom=[],
              kit=None):
        """ Takes a frame in the QUIPPY format and computes a list of its environments. """

        # removes atoms that are to be ignored
        at = fat.copy()
        nol = []
        for s in range(1, at.z.size + 1):
            if at.z[s] in noatom: nol.append(s)
        if len(nol) > 0: at.remove_atoms(nol)

        self.nmax = nmax
        self.lmax = lmax
        self.species = {}
        for z in at.z:
            if z in self.species: self.species[z] += 1
            else: self.species[z] = 1

        self.zspecies = self.species.keys()
        self.zspecies.sort()
        lspecies = 'n_species=' + str(len(self.zspecies)) + ' species_Z={ '
        for z in self.zspecies:
            lspecies = lspecies + str(z) + ' '
        lspecies = lspecies + '}'

        at.set_cutoff(coff)
        at.calc_connect()

        self.nenv = 0
        for sp in self.species:
            if sp in nocenter:
                self.species[sp] = 0
                continue  # Option to skip some environments

            # first computes the descriptors of species that are present
            desc = quippy.descriptors.Descriptor(
                "soap central_weight=" + str(cw) +
                "  covariance_sigma0=0.0 atom_sigma=" + str(gs) + " cutoff=" +
                str(coff) + " cutoff_transition_width=" + str(cotw) +
                " n_max=" + str(nmax) + " l_max=" + str(lmax) + ' ' +
                lspecies + ' Z=' + str(sp))
            try:
                psp = np.asarray(
                    desc.calc(at, desc.dimensions(), self.species[sp])).T
            except TypeError:
                psp = quippy.fzeros(
                    (desc.dimensions(), desc.descriptor_sizes(at)[0]))
                desc.calc(at, descriptor_out=psp)
                psp = np.array(psp.T)

            # now repartitions soaps in environment descriptors
            lenv = []
            for p in psp:
                nenv = environ(nmax, lmax, self.alchem)
                nenv.convert(sp, self.zspecies, p)
                lenv.append(nenv)
            self.env[sp] = lenv
            self.nenv += self.species[sp]

        # adds kit data
        if kit is None: kit = {}

        for sp in kit:
            if not sp in self.species:
                self.species[sp] = 0
                self.env[sp] = []
            for k in range(self.species[sp], kit[sp]):
                self.env[sp].append(
                    environ(self.nmax, self.lmax, self.alchem, sp))
                self.nenv += 1
            self.species[sp] = kit[sp]

        self.zspecies = self.species.keys()
        self.zspecies.sort()

        # also compute the global (flattened) fingerprint
        self.globenv = environ(nmax, lmax, self.alchem)

        for k, se in self.env.items():
            for e in se:
                self.globenv.add(e)
        # divides by the number of atoms in the structure
        for sij in self.globenv.soaps:
            self.globenv.soaps[sij] *= 1.0 / self.nenv