Exemple #1
0
def read_cif(filePath: str):
    """
    Read CIF file with Pymatgen CifParser.

    Args
    ----
    filePath    :   path to input structure.
    """

    # Parse the input file using Pymatgen.
    p = CifParser(Path(filePath), occupancy_tolerance=100, site_tolerance=0)
    pDict = p.as_dict()[list(p.as_dict().keys())[0]]

    # Get extra info from CIF.
    info = fetch_info(pDict)

    # Create Pymatgen "Structure" object. At the moment, only supporting one
    # file at a time, and therefore this should just receive the one and only
    # input structure.
    #TODO: adapt all reads to cycle through all structures in catenated CIF.
    s = p.get_structures(primitive=False)[0]

    # Iterate through all atoms, a, in the structure and set occupancy to 1.
    for i, a in enumerate(s):

        d = a.species.as_dict()
        e = list(d.keys())[0]

        # Test if occupancy is not unity.
        if d[e] != 1:
            s.replace(i, Element(e))

    # Remove potential "duplicate" atoms found within 0.1 Å of one-another.
    s.merge_sites(tol=0.1, mode="delete")

    # Then get a sorted structure (by electronegativity).
    s = s.get_sorted_structure()

    # Occasionally deuterated structures are give, however some routines require
    # protons only. Automatically convert D --> H.
    ds = [a.index for a in s if a.specie.symbol == "D"]

    for d in ds:
        s.replace(d, Element("H"), properties={"index": d})

    # Consider parsing TopoCIF data. This mainly functions well for structures
    # already processed by this code, thereby achieving interal consistency.

    # Require all CIF "tags" to be present in file.
    cif_dict = [x for x in p.as_dict().values()][0]
    s, bonds = extract_topo(s, cif_dict)

    return s, info, bonds
Exemple #2
0
    def __init__(self, cif_file, primitive=False, debug=False, verbose=False):
        """

        :param cif_file: string, path to cif file
        :param primitive: bool, if primitive unit cell should be used
        """

        # housekeeping
        self.debug = debug
        self.verbose = verbose
        # dtypes for numpy structured arrays
        self.sites_dtype = [('atom_type', '|U16'), ('atomic_number', 'i'), ('frac_x', 'f'), ('frac_y', 'f'),
                            ('frac_z', 'f'), ('occ', 'f'), ('DW', 'f')]
        self.supercell_sites_dtype = [('atom_type', '|U16'), ('atomic_number', 'i'), ('x', 'f'), ('y', 'f'), ('z', 'f'),
                                      ('occ', 'f'), ('DW', 'f')]
        self.xyz_dtype = [('atom_type', '|U16'), ('x', 'f'), ('y', 'f'), ('z', 'f')]
        self.XYZ_dtype = [('atomic_number', 'i'), ('x', 'f'), ('y', 'f'), ('z', 'f'), ('occ', 'f'), ('DW', 'f')]
        # initializing matrices
        self.p_mat = np.identity(3)
        self.P_mat = np.identity(4)
        self.Q_mat = np.identity(4)

        # parse cif
        cifparser = CifParser(cif_file)

        # get lattice and symmetry operations matrices
        self.structure = cifparser.get_structures(primitive=primitive)[0]
        self.basis = np.column_stack([self.structure.lattice.matrix[0], self.structure.lattice.matrix[1],
                                      self.structure.lattice.matrix[2]])
        self.lattice_const = np.array([self.structure.lattice.a, self.structure.lattice.b, self.structure.lattice.c])
        self.lattice_angles = np.array([self.structure.lattice.alpha, self.structure.lattice.beta,
                                        self.structure.lattice.gamma])
        ops = cifparser.symmetry_operations
        self.sym_ops = [op.affine_matrix for op in ops]

        # Get sites
        dic = cifparser.as_dict()
        data = [dic[key] for key in dic.keys()][0]
        frac_x = np.array(data['_atom_site_fract_x'], dtype=np.float)
        frac_y = np.array(data['_atom_site_fract_y'], dtype=np.float)
        frac_z = np.array(data['_atom_site_fract_z'], dtype=np.float)
        frac_coord = np.column_stack((frac_x, frac_y, frac_z))
        site_labels = np.array(data['_atom_site_label'])
        site_symbols = np.array(data['_atom_site_type_symbol'])
        site_occps = np.array(data['_atom_site_occupancy']).astype(np.float)
        try:
            DWs = np.array(data['_atom_site_B_iso_or_equiv'] / (8 * np.pi ** 2))
            DWs = np.array(data['_atom_site_U_iso_or_equiv'])
        except:
            DWs = np.array([0.07 for _ in range(site_labels.size)])
        self.sites = np.array([(atom_type, Element(atom_type).Z, x, y, z, occ, dw) for atom_type, (x, y, z), occ, dw in
                               zip(site_symbols, frac_coord, site_occps, DWs)], dtype=self.sites_dtype)
        self.print_verbose(' Lattice vectors:\n', np.round(self.basis, 4))
        self.print_verbose(' Lattice Constants (Å): \n', np.round(self.lattice_const, 4))
        self.print_verbose(' Lattice Angles (deg.): \n', np.round(self.lattice_angles, 4))
        self.print_verbose(' Volume (Å**3): \n', np.round(self.structure.volume, 4))
        self.print_verbose(' Chemical Formula: \n', self.structure.formula)
Exemple #3
0
def cif2gulp(fname):
  assert '.cif' in fname
  system=fname.split('.')[0]
  cif=CifParser(fname)
  _list= list(cif.as_dict().items())
  _,cifd =_list[0][0],_list[0][1]
  #st=Structure.from_file('2222.cif')
  go=GulpIO()
  l0=go.keyword_line("opti conp orthorhombic  noauto\n")
  l1=go.keyword_line("pressure 0 GPa")
  l2=go.keyword_line("ftol 0.0001")
  l3=go.keyword_line("gtol 0.001")
  l4=go.keyword_line("xtol 0.0001")
  l5=go.keyword_line("maxcyc 10000\n")
  
  lst=go.cif_structure_lines(cifd)+'\n'
  #st=go.structure_lines(st)
  ls=eval('+'.join(['l'+str(i) for i in range(6)]))
  lt=f"""title
GULP calculation for {system}
end\n
"""
  lp="""Species
C core C_R
Cu core Cu4+2
H core H_
N core N_3
O core O_2
Br core Br 
F core F_
S core S_R\n
"""
  
  ll=f"""library uff4mof.lib

dump {system}.grs
output cif {system}-relax
"""
  
  ret=ls+lt+lst+lp+ll
  with open(fname.replace('.cif','.in'),'w') as f:
      f.write(ret)
Exemple #4
0
class CalcAnalyzer(object):

    def __init__(self, vaspdir='vasp_run', prim_file='prim.cif',calc_data_file='calcdata.mson',ce_file='ce.mson',ce_radius=None,\
                 max_de=100,max_ew=3, sm_type='pmg_sm', ltol=0.2, stol=0.15, angle_tol=5, solver='cvxopt_l1',\
                 basis='01',weight='unweighted',assign_algo='mag'):
        """
        an_sublats: a list specifying which sites are considered anion sites, for the convenience of anion_framework matcher. If None, CEAuto will
              distinguish on its own, but it can't tell whether a vacancy is on an_site or ca_site. So if you have anion vacancies, this is hightly
              recommended.
              This should be a list of indices. If merge_sublats is None, then they are directly the indices of anion sites within prim, otherwise 
              they stand for the indices of the sublattice group in merge_sublats.
        """
        self.calcdata = {}
        self.vaspdir = vaspdir
        self.calc_data_file = calc_data_file
        self.ce_file = ce_file
        self.solver = solver
        self.weight = weight
        self.sm_type = sm_type
        self.ltol = ltol
        self.stol = stol
        self.angle_tol = angle_tol
        self.basis = basis
        self.assign_algo = assign_algo

        self.prim_file = prim_file
        self.prim = CifParser(prim_file).get_structures()[0]

        #Check if charge assignments are required.
        species_all = []
        for site in self.prim:
            siteSpecies = site.species_string.split(',')
            #print('siteSpecies',siteSpecies)
            if len(siteSpecies) > 1 or (':' in siteSpecies[0]):
                species_all.extend(
                    [[s[0].strip() for s in specieoccu.split(':')]
                     for specieoccu in siteSpecies])
            else:
                species_all.append(siteSpecies[0].strip())
        chgs_all = [GetIonChg(specie) for specie in species_all]
        self.is_charged_ce = False
        for chg in chgs_all:
            if chg != 0:
                self.is_charged_ce = True
                break

        if os.path.isfile(calc_data_file):
            with open(calc_data_file) as Fin:
                self.calcdata = json.load(Fin)
        else:
            print('No previous calculation data found. Building new.')
            self.calcdata['prim'] = self.prim.as_dict()
            self.calcdata['compositions'] = {}

        if os.path.isfile(ce_file):
            with open(ce_file) as Fin:
                ce_dat_old = json.load(Fin)
            self.ce = ClusterExpansion.from_dict(
                ce_dat_old['cluster_expansion'])
            self.max_de = ce_dat_old['max_dielectric']
            self.max_ew = ce_dat_old['max_ewald']
            #self.ce_radius is not set in this case because it will be passed down in analyzer.mson
            self.ce_radius = ce_radius

        else:
            if not ce_radius:
                d_nns = []
                for i, site1 in enumerate(self.prim):
                    d_ij = []
                    for j, site2 in enumerate(self.prim):
                        if j < i: continue
                        if j > i:
                            d_ij.append(site1.distance(site2))
                        if j == i:
                            d_ij.append(
                                min([
                                    self.prim.lattice.a, self.prim.lattice.b,
                                    self.prim.lattice.c
                                ]))
                    d_nns.append(min(d_ij))
                d_nn = max(d_nns)

                self.ce_radius = {}
                # Default cluster radius
                self.ce_radius[2] = d_nn * 4.0
                self.ce_radius[3] = d_nn * 2.0
                self.ce_radius[4] = d_nn * 2.0

            else:
                self.ce_radius = ce_radius

            self.max_de = max_de
            self.max_ew = max_ew

            self.ce = ClusterExpansion.from_radii(self.prim, self.ce_radius,sm_type = self.sm_type,\
                                     ltol=self.ltol, stol=self.stol, angle_tol=self.angle_tol,\
                                     supercell_size='num_sites',use_ewald=True,\
                                     use_inv_r=False,eta=None, basis=self.basis)
            #Using num_sites is a much better way to use sm.

        #self.max_deformation = max_deformation
        #print("Scanning vasprun for new data points.")

    def fit_ce(self):
        """
        Inputs:
            1, data_file: name of the mson file that stores the primitive cell for the calculation,
            compisitional axis (if any), all the structures (input and relaxed), their energies 
            and compositions. (cluster_expansion object, ecis, and ground state solver informations 
            will be saved in another mson file, named as ce_file.) These structures are already 
            assigned charges, and are deduplicated.
               Recorded in one dictionary.
            2, ce_file: a file to store cluster expansion info, gs info, ecis, etc.
            3, ce_radius: Max cluster radius set up. Only required when no existing ce is present.
            4, max_deformation: parameters to set up CE.structure_matcher object (deprecated)
        Outputs:
            None. The ce_data file will be updated.

        """
        print("Loading data from {}".format(self.vaspdir))
        self._load_data()

        #        print('Loaded data presaved to calcdata.tmp')
        #        with open('calcdata.tmp','w') as tmp:
        #            json.dump(self.calcdata,tmp)

        print("Updating cluster expansion.")
        #Use crystal nearest neighbor analyzer to find nearest neighbor distance, and set cluster radius according to it.

        ValidStrs = []
        energies = []
        supmats = []
        for comp in self.calcdata['compositions']:
            for entry in self.calcdata['compositions'][comp]:
                ValidStrs.append(Structure.from_dict(
                    entry['relaxed_deformed']))
                energies.append(entry['total_energy'])
                supmats.append(entry['matrix'])

        #print('ValidStrs',ValidStrs,'len',len(ValidStrs))
        #print('energies',energies,'len',len(energies))
        ## These should already have been deduplicated

    # Fit expansion, currently only support energy/free energy expansion. If you want to expand other properties,
    # You have to write on your own.
        if self.weight == 'unweighted':
            self.ECIG=EciGenerator.unweighted(cluster_expansion=self.ce, structures=ValidStrs,\
                                     energies = energies,\
                                     max_dielectric=self.max_de, max_ewald=self.max_ew, \
                                     solver = self.solver,supercell_matrices=supmats)
        elif self.weight == 'e_above_hull':
            self.ECIG=EciGenerator.weight_by_e_above_hull(cluster_expansion=self.ce, structures=ValidStrs,\
                                     energies = energies,\
                                     max_dielectric=self.max_de, max_ewald=self.max_ew, \
                                     solver = self.solver,supercell_matrices=supmats)
        elif self.weight == 'e_above_comp':
            self.ECIG=EciGenerator.weight_by_e_above_comp(cluster_expansion=self.ce, structures=ValidStrs,\
                                     energies = energies,\
                                     max_dielectric=self.max_de, max_ewald=self.max_ew, \
                                     solver = self.solver,supercell_matrices=supmats)

        else:
            raise ValueError('Weighting option not implemented!')

        self.ECIG.generate()

        print("RMSE: {} eV/prim, num of structures: {}.".format(
            self.ECIG.rmse, len(ValidStrs)))

    def _load_data(self):
        """    
        This function parses existing vasp calculations, does mapping check, assigns charges and writes into the calc_data file 
        mentioned in previous functions. What we mean by mapping check here, is to see whether a deformed structure can be mapped
        into a supercell lattice and generates a set of correlation functions in clustersupercell.corr_from_structure.
        
        We plan to do modify corr_from_structure from using pymatgen.structurematcher to a grid matcher, which will ensure higher 
        acceptance for DFT calculations, but does not necessarily improve CE hamitonian, since some highly dipoled and deformed 
        structures might have poor DFT energy, and even SABOTAGE CE!
        """
        # Every key in self.calcdata['compositions'] is a composition, and each composition contains a list of dict entrees.
        # relaxed_structure, input_structure, magmoms, total_energy.

        _is_vasp_calc = lambda fs: 'POSCAR' in fs and 'INCAR' in fs and 'KPOINTS' in fs and 'POTCAR' in fs
        # Load VASP runs from given directories

        n_matched = 0
        n_inputs = 0
        new_unassigned_strs = []
        for root, dirs, files in os.walk(self.vaspdir):
            #A calculation directories has only 3 status:
            #accepted: calculation was successful, and already entered into calcdata.mson
            #falied: calculated but not successful, either aborted or can't be read into calcdata.mson
            #For these above two, we don't want to submit a calculation or post-process again.
            #not marked: calculation run not started or not finished yet. Since analyzer is always called
            #after runner, we don't need to worry that analyzer will find unmarked folders.

            if _is_vasp_calc(files) and (not 'accepted'
                                         in files) and (not 'failed' in files):
                print("Loading VASP run in {}".format(root))
                parent_root = os.path.join(*root.split(os.sep)[0:-1])
                parent_parent_root = os.path.join(*root.split(os.sep)[0:-2])
                with open(
                        os.path.join(parent_parent_root,
                                     'composition_by_site')) as compfile:
                    composition = json.load(compfile)
                    compstring = json.dumps(composition)

                if compstring not in self.calcdata['compositions']:
                    self.calcdata['compositions'][compstring] = []

                if not os.path.isfile(os.path.join(parent_root, 'matrix')):
                    print(
                        'Warning: matrix presave not found. Will autodetect supercell matrix using structure matcher,\
                           and will suffer from numerical errors!')
                    matrix = None
                else:
                    with open(os.path.join(parent_root, 'matrix')) as mat_file:
                        matrix = json.load(mat_file)
                #Check existence of output structure
                try:
                    relaxed_struct = Poscar.from_file(
                        os.path.join(root, 'CONTCAR')).structure
                except:
                    print('Entry {} CONTCAR can not be read. Skipping.'.format(
                        root))
                    open(os.path.join(root, 'failed'), 'a').close()
                    continue

                input_struct = Poscar.from_file(
                    os.path.join(parent_root, 'POSCAR')).structure

                #Check uniqueness
                strict_sm = StructureMatcher(stol=0.1,
                                             ltol=0.1,
                                             angle_tol=1,
                                             comparator=ElementComparator())
                _is_unique = True
                for entry in self.calcdata['compositions'][compstring]:
                    entry_struct = Structure.from_dict(
                        entry['relaxed_structure'])
                    if strict_sm.fit(entry_struct, relaxed_struct):
                        _is_unique = False
                        break
                if not _is_unique:
                    print('Entry {} alredy calculated before.'.format(root))
                    open(os.path.join(root, 'accepted'), 'a').close()
                    continue
                n_inputs += 1

                # Note: the input_struct here comes from the poscar in upper root, rather than fm.0, so
                # it is not deformed.

                # Rescale volume to that of unrelaxed structure, this will lead to a better mapping back.
                # I changed it to a rescaling tensor
                relaxed_lat_mat = np.matrix(relaxed_struct.lattice.matrix)
                input_lat_mat = np.matrix(input_struct.lattice.matrix)
                o2i_deformation = Deformation(input_lat_mat.T *
                                              relaxed_lat_mat.I.T)
                relaxed_deformed = o2i_deformation.apply_to_structure(
                    relaxed_struct)
                #print(relaxed_deformed,input_struct)

                # Assign oxidation states to Mn based on magnetic moments in OUTCAR, first check existence of OUTCAR
                try:
                    Out = Outcar(os.path.join(root, 'OUTCAR'))
                except:
                    print('Entry {} OUTCAR can not be read. Skipping.'.format(
                        root))
                    open(os.path.join(root, 'failed'), 'a').close()
                    continue

                # Get final energy from OSZICAR or Vasprun. Vasprun is better but OSZICAR is much
                # faster and works fine is you separately check for convergence, sanity of
                # magnetic moments, structure geometry
                with open(os.path.join(root, 'OUTCAR')) as outfile:
                    outcar_string = outfile.read()
                if 'reached required accuracy' not in outcar_string:
                    print(
                        'Entry {} did not converge to required accuracy. Skipping.'
                        .format(root))
                    open(os.path.join(root, 'failed'), 'a').close()
                    continue
                TotE = Oszicar(os.path.join(root, 'OSZICAR')).final_energy
                # Checking convergence
                Mag = []
                for SiteInd, Site in enumerate(relaxed_struct.sites):
                    Mag.append(np.abs(Out.magnetization[SiteInd]['tot']))

                new_entry = {}
                new_entry['input_structure'] = input_struct.as_dict()
                new_entry['relaxed_structure'] = relaxed_struct.as_dict()
                new_entry['relaxed_deformed'] = relaxed_deformed.as_dict()
                new_entry['total_energy'] = TotE
                new_entry['magmoms'] = Mag
                new_entry['matrix'] = matrix

                if os.path.isfile(os.path.join(parent_parent_root, 'axis')):
                    with open(os.path.join(parent_parent_root,
                                           'axis')) as axisfile:
                        axis = json.load(axisfile)
                    if 'axis' not in new_entry:
                        new_entry['axis'] = axis

                new_unassigned_strs.append((compstring, root, new_entry))

        if len(new_unassigned_strs) == 0:
            print('No new structures appeared. Calcdata will not be updated.')
            return

        #Charge assignment
        if self.is_charged_ce:
            relaxed_deformed_pool = []
            relaxed_strs_pool = []
            mags = []
            roots = []
            energies = []
            comps = []
            inputs = []
            mats = []
            if 'axis' in new_unassigned_strs[0][2]:
                axis = []
            for compstring, root, new_entry in new_unassigned_strs:
                # Out=Outcar(os.path.join(root,'OUTCAR'))
                Mag = new_entry['magmoms']
                relaxed_struct = Structure.from_dict(
                    new_entry['relaxed_structure'])
                relaxed_deformed = Structure.from_dict(
                    new_entry['relaxed_deformed'])
                # Throw out structures where oxidation states don't make charge balanced.

                mags.append(Mag)
                roots.append(root)
                relaxed_strs_pool.append(relaxed_struct)
                relaxed_deformed_pool.append(relaxed_deformed)
                comps.append(compstring)
                inputs.append(Structure.from_dict(
                    new_entry['input_structure']))
                energies.append(new_entry['total_energy'])
                mats.append(new_entry['matrix'])
                if 'axis' in new_entry:
                    axis.append(new_entry['axis'])

            CA = ChargeAssign(relaxed_strs_pool, mags, algo=self.assign_algo)
            relaxed_strs_assigned = CA.assigned_structures
            relaxed_deformed_assigned = CA.extend_assignments(
                relaxed_deformed_pool, mags)

            for i in range(len(inputs)):
                if relaxed_strs_assigned[
                        i] is not None and relaxed_deformed_assigned[
                            i] is not None:
                    # Checking whether structure can be mapped to corr function.
                    # This is out deformation tolerance.
                    try:
                        if mats[i] is not None:
                            cesup = self.ce.supercell_from_matrix(mats[i])
                            corr = cesup.corr_from_structure(
                                relaxed_deformed_assigned[i])
                        else:
                            corr = self.ce.corr_from_structure(
                                relaxed_deformed_assigned[i])
                    except:
                        print(
                            "Entry {} too far from original lattice. Skipping."
                            .format(roots[i]))
                        open(os.path.join(roots[i], 'failed'), 'a').close()
                        continue

                    assigned_entry = {}
                    assigned_entry['input_structure'] = inputs[i].as_dict()
                    assigned_entry[
                        'relaxed_structure'] = relaxed_strs_assigned[
                            i].as_dict()
                    assigned_entry[
                        'relaxed_deformed'] = relaxed_deformed_assigned[
                            i].as_dict()
                    assigned_entry['matrix'] = mats[i]
                    assigned_entry['total_energy'] = energies[i]
                    assigned_entry['magmoms'] = mags[i]
                    if 'axis' in new_unassigned_strs[0][2]:
                        assigned_entry['axis'] = axis[i]
                    self.calcdata['compositions'][comps[i]].append(
                        assigned_entry)
                    print('Entry {} accepted!'.format(roots[i]))
                    open(os.path.join(roots[i], 'accepted'), 'a').close()
                    n_matched += 1

                else:
                    print("Entry {} can not be assigned. Skipping.".format(
                        roots[i]))
                    open(os.path.join(roots[i], 'failed'), 'a').close()
                    continue
        else:
            print('Doing non charged ce.')
            for compstring, root, new_entry in new_unassigned_strs:
                # Checking whether structure can be mapped to corr function.
                # This is out deformation tolerance.
                try:
                    if new_entry['matrix'] is not None:
                        cesup = self.ce.supercell_from_matrix(
                            new_entry['matrix'])
                        corr = cesup.corr_from_structure(
                            Structure.from_dict(new_entry['relaxed_defromed']))
                    else:
                        corr = self.ce.corr_from_structure(
                            Structure.from_dict(new_entry['relaxed_defromed']))
                except:
                    print("Entry {} too far from original lattice. Skipping.".
                          format(root))
                    open(os.path.join(root, 'failed'), 'a').close()
                    continue

                self.calcdata['compositions'][compstring].append(new_entry)
                open(os.path.join(root, 'accepted'), 'a').close()
                n_matched += 1
        # Data already deduplicated!

        print(
            '{}/{} structures matched in this run. Parsed vasp data will be saved into {}.'
            .format(n_matched, n_inputs, self.calc_data_file))

    def write_files(self):
        with open(self.calc_data_file, 'w') as Fout:
            json.dump(self.calcdata, Fout)
        #with open(self.ce_file,'w') as Fout:
        #d = self.ECIG.as_dict()
        #for key,val in d.items():
        #    print('key {} is of type {}'.format(key,type(val)))
        #json.dump(d,Fout)
        # For any msonable, use dumpfn to save your time!
        dumpfn(self.ECIG, self.ce_file)

    @classmethod
    def from_settings(cls, setting_file='analyzer.mson'):
        if os.path.isfile(setting_file):
            with open(setting_file, 'r') as fs:
                settings = json.load(fs)
        else:
            settings = {}
        return cls.from_dict(settings)

    @classmethod
    def from_dict(cls, settings):
        if 'vaspdir' in settings: vaspdir = settings['vaspdir']
        else: vaspdir = 'vasp_run'

        if 'prim_file' in settings: prim_file = settings['prim_file']
        else: prim_file = 'prim.cif'

        if 'calc_data_file' in settings:
            calc_data_file = settings['calc_data_file']
        else:
            calc_data_file = 'calcdata.mson'

        if 'ce_file' in settings: ce_file = settings['ce_file']
        else: ce_file = 'ce.mson'

        if 'ce_radius' in settings: ce_radius = settings['ce_radius']
        else: ce_radius = None

        if 'max_de' in settings: max_de = settings['max_de']
        else: max_de = 100

        if 'max_ew' in settings: max_ew = settings['max_ew']
        else: max_ew = 3

        if 'sm_type' in settings: sm_type = settings['sm_type']
        else: sm_type = 'pmg_sm'

        if 'ltol' in settings: ltol = settings['ltol']
        else: ltol = 0.2

        if 'stol' in settings: stol = settings['stol']
        else: stol = 0.15

        if 'angle_tol' in settings: angle_tol = settings['angle_tol']
        else: angle_tol = 5

        if 'solver' in settings: solver = settings['solver']
        else: solver = 'cvxopt_l1'

        if 'basis' in settings: basis = settings['basis']
        else: basis = '01'

        if 'weight' in settings: weight = settings['weight']
        else: weight = 'unweighted'

        if 'assign_algo' in settings: assign_algo = settings['assign_algo']
        else: assign_algo = 'mag'

        return cls(vaspdir=vaspdir,prim_file=prim_file,calc_data_file=calc_data_file,ce_file = ce_file, ce_radius=ce_radius,\
                   max_de = max_de, max_ew = max_ew, sm_type = sm_type, ltol = ltol, stol = stol, angle_tol = angle_tol,\
                   solver = solver, basis = basis, weight = weight, assign_algo=assign_algo)

    def as_dict(self):
        settings = {}
        settings['vaspdir'] = self.vaspdir
        settings['prim_file'] = self.prim_file
        settings['calc_data_file'] = self.calc_data_file
        settings['ce_file'] = self.ce_file
        settings['ce_radius'] = self.ce_radius
        settings['max_de'] = self.max_de
        settings['max_ew'] = self.max_ew
        settings['sm_type'] = self.sm_type
        settings['ltol'] = self.ltol
        settings['stol'] = self.stol
        settings['angle_tol'] = self.angle_tol
        settings['solver'] = self.solver
        settings['basis'] = self.basis
        settings['weight'] = self.weight
        settings['assign_algo'] = self.assign_algo
        return settings

    def write_settings(self, settings_file='analyzer.mson'):
        print('Writing anlyzer settings to {}'.format(settings_file))
        with open(settings_file, 'w') as fout:
            json.dump(self.as_dict(), fout)
Exemple #5
0
    def _assimilate_from_cif(self, cif_path):
        # capture any warnings generated by parsing cif file

        file_ID = cif_path.split('/')[-1].split(".")[0]

        cif_meta = {}
        with warnings.catch_warnings(record=True) as w:
            cif_parser = CifParser(cif_path)
            for warn in w:
                if 'cifwarnings' in cif_meta:
                    cif_meta['cifwarnings'].append(str(warn.message))
                else:
                    cif_meta['cifwarnings'] = [str(warn.message)]
                logger.warning('{}: {}'.format(file_ID, warn.message))

        cif_dict = cif_parser.as_dict()
        orig_id = list(cif_dict.keys())[0]
        easy_dict = cif_dict[orig_id]

        if '_chemical_name_mineral' in easy_dict:
            cif_meta['min_name'] = easy_dict['_chemical_name_mineral']
        if '_chemical_name_systematic' in easy_dict:
            cif_meta['chem_name'] = easy_dict['_chemical_name_systematic']
        if '_cell_measurement_pressure' in easy_dict:
            cif_meta['pressure'] = float(
                easy_dict['_cell_measurement_pressure']) / 1000
        else:
            cif_meta['pressure'] = .101325

        with warnings.catch_warnings(record=True) as w:
            try:
                struc = cif_parser.get_structures()[0]
            except ValueError as err:
                # if cif parsing raises error, write icsd_id to Error_Record and do NOT add structure to mongo database
                logger.error(
                    file_ID + ': {}'.format(err) +
                    "\nDid not insert structure into Mongo Collection")
                with open('Error_Record', 'a') as err_rec:
                    err_rec.write(str(file_ID) + ': {}\n'.format(err))
                    err_rec.close()
            else:
                references = self.bibtex_from_cif(cif_path)
                history = [{
                    'name': 'ICSD',
                    'url': 'https://icsd.fiz-karlsruhe.de/',
                    'description': {
                        'id': file_ID
                    }
                }]

                cif_meta['references'] = references
                cif_meta['history'] = history

                atomate_meta = get_meta_from_structure(struc)
                # data['nsites'] = meta['nsites']
                # data['elements'] = meta['elements']
                # data['nelements'] = meta['nelements']
                # data['formula'] = meta['formula']
                # data['formula_reduced'] = meta['formula_pretty']
                # data['formula_reduced_abc'] = meta['formula_reduced_abc']
                # data['formula_anonymous'] = meta['formula_anonymous']
                # data['chemsys'] = meta['chemsys']
                # data['is_valid'] = meta['is_valid']
                # data['is_ordered'] = meta['is_ordered']

            # unfortunately any warnings are logged after any errors. Not too big of an issue
            for warn in w:
                if 'cifwarnings' in cif_meta:
                    cif_meta['cifwarnings'].append(str(warn.message))
                else:
                    cif_meta['cifwarnings'] = [str(warn.message)]
                logger.warning('{}: {}'.format(file_ID, warn.message))

        return (struc, cif_meta, atomate_meta)
Exemple #6
0
    def assimilate(self,
                   path,
                   dbhost='localhost',
                   dbport=27017,
                   dbname='ICSD',
                   collection_name='ICSD_files',
                   store_mongo=True):
        """
        Assimilate data in a directory path into a pymatgen object. Because of
        the quirky nature of Python"s multiprocessing, the object must support
        pymatgen's as_dict() for parallel processing.
        Args:
            path: directory path
        Returns:
            An assimilated object
        """
        if store_mongo:
            client = MongoClient(dbhost, dbport)
            db = client[dbname]
            col = db[collection_name]

        data = {}

        files = os.listdir(path)
        file_ID = path.split('/')[-1]
        print(file_ID)
        data['icsd_id'] = int(file_ID)

        #data['cifwarnings'] = []
        cif_path = os.path.join(path, file_ID + '.cif')

        # capture any warnings generated by parsing cif file
        with warnings.catch_warnings(record=True) as w:
            cif_parser = CifParser(cif_path)
            for warn in w:
                if 'cifwarnings' in data:
                    data['cifwarnings'].append(str(warn.message))
                else:
                    data['cifwarnings'] = [str(warn.message)]
                logger.warning('{}: {}'.format(file_ID, warn.message))

        cif_dict = cif_parser.as_dict()
        orig_id = list(cif_dict.keys())[0]
        easy_dict = cif_dict[orig_id]

        if '_chemical_name_mineral' in easy_dict:
            data['min_name'] = easy_dict['_chemical_name_mineral']
        if '_chemical_name_systematic' in easy_dict:
            data['chem_name'] = easy_dict['_chemical_name_systematic']
        if '_cell_measurement_pressure' in easy_dict:
            data['pressure'] = float(
                easy_dict['_cell_measurement_pressure']) / 1000
        else:
            data['pressure'] = .101325

        with warnings.catch_warnings(record=True) as w:
            try:
                struc = cif_parser.get_structures()[0]
            except ValueError as err:
                # if cif parsing raises error, write icsd_id to Error_Record and do NOT add structure to mongo database
                logger.error(
                    file_ID + ': {}'.format(err) +
                    "\nDid not insert structure into Mongo Collection")
                with open('Error_Record', 'a') as err_rec:
                    err_rec.write(str(file_ID) + ': {}\n'.format(err))
                    err_rec.close()
            else:
                authors = 'Donny Winston<*****@*****.**>, Joseph Palakapilly<*****@*****.**>'
                references = self.bibtex_from_cif(cif_path)
                history = [{
                    'name': 'ICSD',
                    'url': 'https://icsd.fiz-karlsruhe.de/',
                    'description': {
                        'icsd_id': file_ID
                    }
                }]
                snl = StructureNL(struc,
                                  authors=authors,
                                  references=references,
                                  history=history)
                data['snl'] = snl.as_dict()

                meta = get_meta_from_structure(struc)
                data['nsites'] = meta['nsites']
                data['elements'] = meta['elements']
                data['nelements'] = meta['nelements']
                data['formula'] = meta['formula']
                data['formula_reduced'] = meta['formula_pretty']
                data['formula_reduced_abc'] = meta['formula_reduced_abc']
                data['formula_anonymous'] = meta['formula_anonymous']
                data['chemsys'] = meta['chemsys']
                data['is_valid'] = meta['is_valid']
                data['is_ordered'] = meta['is_ordered']

            #unfortunately any warnings are logged after any errors. Not too big of an issue
            for warn in w:
                if 'cifwarnings' in data:
                    data['cifwarnings'].append(str(warn.message))
                else:
                    data['cifwarnings'] = [str(warn.message)]
                logger.warning('{}: {}'.format(file_ID, warn.message))

        if 'snl' in data:
            if store_mongo:
                col.update_one({'icsd_id': int(file_ID)}, {'$set': data},
                               upsert=True)

        return data