def __init__(self, lambda_table=None, alpha=-5): if lambda_table is not None: self._lambda_table = lambda_table else: module_dir = os.path.dirname(__file__) json_file = os.path.join(module_dir, 'data', 'lambda.json') with open(json_file) as f: self._lambda_table = json.load(f) # build map of specie pairs to lambdas self.alpha = alpha self._l = {} self.species = set() for row in self._lambda_table: if 'D1+' not in row: s1 = Specie.from_string(row[0]) s2 = Specie.from_string(row[1]) self.species.add(s1) self.species.add(s2) self._l[frozenset([s1, s2])] = float(row[2]) # create Z and px self.Z = 0 self._px = defaultdict(float) for s1, s2 in itertools.product(self.species, repeat=2): value = math.exp(self.get_lambda(s1, s2)) self._px[s1] += value / 2 self._px[s2] += value / 2 self.Z += value
def __init__(self, lambda_table=None, alpha=-5): if lambda_table is not None: self._lambda_table = lambda_table else: module_dir = os.path.dirname(__file__) json_file = os.path.join(module_dir, 'data', 'lambda.json') with open(json_file) as f: self._lambda_table = json.load(f) #build map of specie pairs to lambdas self.alpha = alpha self._l = {} self.species = set() for row in self._lambda_table: if 'D1+' not in row: s1 = Specie.from_string(row[0]) s2 = Specie.from_string(row[1]) self.species.add(s1) self.species.add(s2) self._l[frozenset([s1, s2])] = float(row[2]) #create Z and px self.Z = 0 self._px = defaultdict(float) for s1, s2 in itertools.product(self.species, repeat=2): value = math.exp(self.get_lambda(s1, s2)) self._px[s1] += value / 2 self._px[s2] += value / 2 self.Z += value
def test_to_from_string(self): fe3 = Specie("Fe", 3, {"spin": 5}) self.assertEqual(str(fe3), "Fe3+spin=5") fe = Specie.from_string("Fe3+spin=5") self.assertEqual(fe.spin, 5) mo0 = Specie("Mo", 0, {"spin": 5}) self.assertEqual(str(mo0), "Mo0+spin=5") mo = Specie.from_string("Mo0+spin=4") self.assertEqual(mo.spin, 4)
def test_to_from_string(self): fe3 = Specie("Fe", 3, {"spin": 5}) self.assertEqual(str(fe3), "Fe3+,spin=5") fe = Specie.from_string("Fe3+,spin=5") self.assertEqual(fe.spin, 5) mo0 = Specie("Mo", 0, {"spin": 5}) self.assertEqual(str(mo0), "Mo0+,spin=5") mo = Specie.from_string("Mo0+,spin=4") self.assertEqual(mo.spin, 4) fe_no_ox = Specie("Fe", oxidation_state=None, properties={"spin": 5}) fe_no_ox_from_str = Specie.from_string("Fe,spin=5") self.assertEqual(fe_no_ox, fe_no_ox_from_str)
def test_reading_vasprun_xml(self): site_distance = 3.2 specie_string = 'Li+' specie = Specie.from_string(specie_string) vasprun_dirs = [ os.path.join(test_dir, 'latp_md/RUN_{}/vasprun.xml.gz'.format(i)) for i in range(10, 30) ] da = DiffusivityAnalyzer.from_files( vasprun_dirs, str(specie.element), spec_dict={ 'lower_bound': 0.5 * site_distance * site_distance, 'upper_bound': 0.5, 'minimum_msd_diff': 0.5 * site_distance * site_distance, }) ea = ErrorAnalysisFromDiffusivityAnalyzer(da, site_distance=site_distance) summary_info = ea.get_summary_dict(oxidized_specie=specie_string) self.assertAlmostEqual(summary_info['diffusivity'], 7.60175023036e-05, places=5) self.assertAlmostEqual( summary_info['diffusivity_relative_standard_deviation'], 0.382165427856, places=5) self.assertAlmostEqual(summary_info['n_jump'], 100.48841284, places=5) self.assertAlmostEqual(summary_info['conversion_factor'], 7675326.58284, places=5) self.assertAlmostEqual(summary_info['temperature'], 1500, places=5) self.assertAlmostEqual(summary_info['conductivity'], 583.45915619213463, places=5)
def get_conversion_factor(structure, specie, temperature): """ Conversion factor to convert between cm^2/s diffusivity measurements and mS/cm conductivity measurements based on number of atoms of diffusing species. :param structure (Structure): Input structure. :param specie (string/specie): Diffusing species string, must contain oxidation state. :param temperature (float): Temperature of the diffusion run in Kelvin. :return: Conversion factor. Conductivity (in mS/cm) = Conversion Factor * Diffusivity (in cm^2/s) """ if type(specie) is Specie: df_sp = specie else: try: df_sp = Specie.from_string(specie) except: raise Exception( "Please provide oxidation decorated specie, like Li+, O2-") z = df_sp.oxi_state el, occu = list(structure.composition.items())[0] if isinstance(el, Specie): # oxidation decorated structure n = structure.composition[specie] else: n = structure.composition[str(df_sp.element)] if n == 0: raise Exception("No specie {} in the structure composition: {}".format( specie, structure.composition)) vol = structure.volume * 1e-24 # units cm^3 N_A = 6.022140857e+23 e = 1.6021766208e-19 R = 8.3144598 return 1000 * n / (vol * N_A) * z ** 2 * (N_A * e) ** 2 \ / (R * temperature)
def get_summary_dict(self, oxidized_specie=None): """ A summary of information :param oxidized_specie (str): specie string with oxidation state. If provided or specie in initial function is oxidized, it will calculate conductivity based on nernst-einstein relationship. :return: dict of diffusion information keys: D, D_components, specie, step_skip, temperature, msd, msd_component, dt, time_intervals_number spec_dict """ d = { "diffusivity": self.diffusivity, "diffusivity_components": self.diffusivity_components, "specie": self.specie, "step_skip": self.step_skip, "temperature": self.temperature, "msd": self.msd, "msd_component": self.msd_component, "dt": self.dt, "time_intervals_number": self.time_intervals_number, "spec_dict": self.spec_dict, "drift_maximum": self.drift_maximum, "max_framework_displacement": self.max_framework_displacement } oxi = False if oxidized_specie: df_sp = Specie.from_string(oxidized_specie) oxi = True else: try: df_sp = Specie.from_string(self.specie) oxi = True except: pass if oxi: factor = get_conversion_factor(self.structure, df_sp, self.temperature) d['conductivity'] = factor * self.diffusivity d['conductivity_components'] = factor * self.diffusivity_components d['conversion_factor'] = factor d['oxidation_state'] = df_sp.oxi_state return d
def __init__(self, lambda_table=None, alpha=-5): #store the input table for the to_dict method self._lambda_table = lambda_table if not lambda_table: module_dir = os.path.dirname(__file__) json_file = os.path.join(module_dir, 'data', 'lambda.json') with open(json_file) as f: lambda_table = json.load(f) #build map of specie pairs to lambdas l = {} for row in lambda_table: if not row[0] == 'D1+' and not row[1] == 'D1+': s1 = Specie.from_string(row[0]) s2 = Specie.from_string(row[1]) l[frozenset([s1, s2])] = float(row[2]) self._lambda = l self._alpha = alpha #create the partition functions Z and px sp_set = set() for key in self._lambda.keys(): sp_set.update(key) px = dict.fromkeys(sp_set, 0.) Z = 0 for s1, s2 in itertools.product(sp_set, repeat=2): value = math.exp(self._lambda.get(frozenset([s1, s2]), self._alpha)) #not sure why the factor of 2 is here but it matches up #with BURP. BURP may actually be missing a factor of 2, #but it doesn't have a huge effect px[s1] += value / 2 px[s2] += value / 2 Z += value self._Z = Z self._px = px self.species_list = list(sp_set)
def find_connected_atoms(struct, tolerance=0.45, ldict=JmolNN().el_radius): """ Finds the list of bonded atoms. Author: "Gowoon Cheon" Email: "*****@*****.**" Args: struct (Structure): Input structure tolerance: length in angstroms used in finding bonded atoms. Two atoms are considered bonded if (radius of atom 1) + (radius of atom 2) + (tolerance) < (distance between atoms 1 and 2). Default value = 0.45, the value used by JMol and Cheon et al. ldict: dictionary of bond lengths used in finding bonded atoms. Values from JMol are used as default Returns: (np.ndarray): A numpy array of shape (number of bonded pairs, 2); each row of is of the form [atomi, atomj]. atomi and atomj are the indices of the atoms in the input structure. If any image of atomj is bonded to atomi with periodic boundary conditions, [atomi, atomj] is included in the list. If atomi is bonded to multiple images of atomj, it is only counted once. """ n_atoms = len(struct.species) fc = np.array(struct.frac_coords) species = list(map(str, struct.species)) # in case of charged species for i, item in enumerate(species): if item not in ldict.keys(): species[i] = str(Specie.from_string(item).element) latmat = struct.lattice.matrix connected_list = [] for i in range(n_atoms): for j in range(i + 1, n_atoms): max_bond_length = ldict[species[i]] + ldict[species[j]] + tolerance add_ij = False for move_cell in itertools.product( [0, 1, -1], [0, 1, -1], [0, 1, -1]): if not add_ij: frac_diff = fc[j] + move_cell - fc[i] distance_ij = np.dot(latmat.T, frac_diff) if np.linalg.norm(distance_ij) < max_bond_length: add_ij = True if add_ij: connected_list.append([i, j]) return np.array(connected_list)
def find_connected_atoms(struct, tolerance=0.45, ldict=JmolNN().el_radius): """ Finds the list of bonded atoms. Author: "Gowoon Cheon" Email: "*****@*****.**" Args: struct (Structure): Input structure tolerance: length in angstroms used in finding bonded atoms. Two atoms are considered bonded if (radius of atom 1) + (radius of atom 2) + (tolerance) < (distance between atoms 1 and 2). Default value = 0.45, the value used by JMol and Cheon et al. ldict: dictionary of bond lengths used in finding bonded atoms. Values from JMol are used as default Returns: (np.ndarray): A numpy array of shape (number of bonded pairs, 2); each row of is of the form [atomi, atomj]. atomi and atomj are the indices of the atoms in the input structure. If any image of atomj is bonded to atomi with periodic boundary conditions, [atomi, atomj] is included in the list. If atomi is bonded to multiple images of atomj, it is only counted once. """ n_atoms = len(struct.species) fc = np.array(struct.frac_coords) species = list(map(str, struct.species)) # in case of charged species for i, item in enumerate(species): if item not in ldict.keys(): species[i] = str(Specie.from_string(item).element) latmat = struct.lattice.matrix connected_list = [] for i in range(n_atoms): for j in range(i + 1, n_atoms): max_bond_length = ldict[species[i]] + ldict[species[j]] + tolerance add_ij = False for move_cell in itertools.product([0, 1, -1], [0, 1, -1], [0, 1, -1]): if not add_ij: frac_diff = fc[j] + move_cell - fc[i] distance_ij = np.dot(latmat.T, frac_diff) if np.linalg.norm(distance_ij) < max_bond_length: add_ij = True if add_ij: connected_list.append([i, j]) return np.array(connected_list)
def find_connected_atoms(struct, tolerance=0.45, ldict=JmolNN().el_radius): """ Finds bonded atoms and returns a adjacency matrix of bonded atoms. Author: "Gowoon Cheon" Email: "*****@*****.**" Args: struct (Structure): Input structure tolerance: length in angstroms used in finding bonded atoms. Two atoms are considered bonded if (radius of atom 1) + (radius of atom 2) + (tolerance) < (distance between atoms 1 and 2). Default value = 0.45, the value used by JMol and Cheon et al. ldict: dictionary of bond lengths used in finding bonded atoms. Values from JMol are used as default Returns: (np.ndarray): A numpy array of shape (number of atoms, number of atoms); If any image of atom j is bonded to atom i with periodic boundary conditions, the matrix element [atom i, atom j] is 1. """ # pylint: disable=E1136 n_atoms = len(struct.species) fc = np.array(struct.frac_coords) fc_copy = np.repeat(fc[:, :, np.newaxis], 27, axis=2) neighbors = np.array( list(itertools.product([0, 1, -1], [0, 1, -1], [0, 1, -1]))).T neighbors = np.repeat(neighbors[np.newaxis, :, :], 1, axis=0) fc_diff = fc_copy - neighbors species = list(map(str, struct.species)) # in case of charged species for i, item in enumerate(species): if item not in ldict.keys(): species[i] = str(Specie.from_string(item).element) latmat = struct.lattice.matrix connected_matrix = np.zeros((n_atoms, n_atoms)) for i in range(n_atoms): for j in range(i + 1, n_atoms): max_bond_length = ldict[species[i]] + ldict[species[j]] + tolerance frac_diff = fc_diff[j] - fc_copy[i] distance_ij = np.dot(latmat.T, frac_diff) # print(np.linalg.norm(distance_ij,axis=0)) if sum(np.linalg.norm(distance_ij, axis=0) < max_bond_length) > 0: connected_matrix[i, j] = 1 connected_matrix[j, i] = 1 return connected_matrix
def find_connected_atoms(struct, tolerance=0.45, ldict=JmolNN().el_radius): """ Finds bonded atoms and returns a adjacency matrix of bonded atoms. Author: "Gowoon Cheon" Email: "*****@*****.**" Args: struct (Structure): Input structure tolerance: length in angstroms used in finding bonded atoms. Two atoms are considered bonded if (radius of atom 1) + (radius of atom 2) + (tolerance) < (distance between atoms 1 and 2). Default value = 0.45, the value used by JMol and Cheon et al. ldict: dictionary of bond lengths used in finding bonded atoms. Values from JMol are used as default Returns: (np.ndarray): A numpy array of shape (number of atoms, number of atoms); If any image of atom j is bonded to atom i with periodic boundary conditions, the matrix element [atom i, atom j] is 1. """ n_atoms = len(struct.species) fc = np.array(struct.frac_coords) fc_copy = np.repeat(fc[:, :, np.newaxis], 27, axis=2) neighbors = np.array(list(itertools.product([0, 1, -1], [0, 1, -1], [0, 1, -1]))).T neighbors = np.repeat(neighbors[np.newaxis, :, :], 1, axis=0) fc_diff = fc_copy - neighbors species = list(map(str, struct.species)) # in case of charged species for i, item in enumerate(species): if not item in ldict.keys(): species[i] = str(Specie.from_string(item).element) latmat = struct.lattice.matrix connected_matrix = np.zeros((n_atoms,n_atoms)) for i in range(n_atoms): for j in range(i + 1, n_atoms): max_bond_length = ldict[species[i]] + ldict[species[j]] + tolerance frac_diff = fc_diff[j] - fc_copy[i] distance_ij = np.dot(latmat.T, frac_diff) # print(np.linalg.norm(distance_ij,axis=0)) if sum(np.linalg.norm(distance_ij, axis=0) < max_bond_length) > 0: connected_matrix[i, j] = 1 connected_matrix[j, i] = 1 return connected_matrix
def _species_from_bondstr(self, bondstr): """ Create a 2-tuple of species objects from a bond string. Args: bondstr (str): A string representing a bond between elements or species, or a combination of the two. For example, "Cl- - Cs+". Returns: ((Species)): A tuple of pymatgen Species objects in alphabetical order. """ species = [] for ss in bondstr.split(self.token): try: species.append(Specie.from_string(ss)) except ValueError: d = {'element': ss, 'oxidation_state': 0} species.append(Specie.from_dict(d)) return tuple(species)
def oxi_state_guesses(self, oxi_states_override=None, target_charge=0, all_oxi_states=False, max_sites=None): """ Checks if the composition is charge-balanced and returns back all charge-balanced oxidation state combinations. Composition must have integer values. Note that more num_atoms in the composition gives more degrees of freedom. e.g., if possible oxidation states of element X are [2,4] and Y are [-3], then XY is not charge balanced but X2Y2 is. Results are returned from most to least probable based on ICSD statistics. Use max_sites to improve performance if needed. Args: oxi_states_override (dict): dict of str->list to override an element's common oxidation states, e.g. {"V": [2,3,4,5]} target_charge (int): the desired total charge on the structure. Default is 0 signifying charge balance. all_oxi_states (bool): if True, an element defaults to all oxidation states in pymatgen Element.icsd_oxidation_states. Otherwise, default is Element.common_oxidation_states. Note that the full oxidation state list is *very* inclusive and can produce nonsensical results. max_sites (int): if possible, will reduce Compositions to at most this many many sites to speed up oxidation state guesses. Set to -1 to just reduce fully. Returns: A list of dicts - each dict reports an element symbol and average oxidation state across all sites in that composition. If the composition is not charge balanced, an empty list is returned. """ comp = self.copy() # reduce Composition if necessary if max_sites == -1: comp = self.reduced_composition elif max_sites and comp.num_atoms > max_sites: reduced_comp, reduced_factor = self.\ get_reduced_composition_and_factor() if reduced_factor > 1: reduced_comp *= max(1, int(max_sites / reduced_comp.num_atoms)) comp = reduced_comp # as close to max_sites as possible if comp.num_atoms > max_sites: raise ValueError("Composition {} cannot accommodate max_sites " "setting!".format(comp)) # Load prior probabilities of oxidation states, used to rank solutions if not Composition.oxi_prob: module_dir = os.path.join(os.path. dirname(os.path.abspath(__file__))) all_data = loadfn(os.path.join(module_dir, "..", "analysis", "icsd_bv.yaml")) Composition.oxi_prob = {Specie.from_string(sp): data for sp, data in all_data["occurrence"].items()} oxi_states_override = oxi_states_override or {} # assert: Composition only has integer amounts if not all(amt == int(amt) for amt in comp.values()): raise ValueError("Charge balance analysis requires integer " "values in Composition!") # for each element, determine all possible sum of oxidations # (taking into account nsites for that particular element) el_amt = comp.get_el_amt_dict() els = el_amt.keys() el_sums = [] # matrix: dim1= el_idx, dim2=possible sums el_sum_scores = defaultdict(set) # dict of el_idx, sum -> score for idx, el in enumerate(els): el_sum_scores[idx] = {} el_sums.append([]) if oxi_states_override.get(el): oxids = oxi_states_override[el] elif all_oxi_states: oxids = Element(el).oxidation_states else: oxids = Element(el).icsd_oxidation_states or \ Element(el).oxidation_states # get all possible combinations of oxidation states # and sum each combination for oxid_combo in combinations_with_replacement(oxids, int(el_amt[el])): if sum(oxid_combo) not in el_sums[idx]: el_sums[idx].append(sum(oxid_combo)) score = sum([Composition.oxi_prob.get(Specie(el, o), 0) for o in oxid_combo]) # how probable is this combo? el_sum_scores[idx][sum(oxid_combo)] = max( el_sum_scores[idx].get(sum(oxid_combo), 0), score) all_sols = [] # will contain all solutions all_scores = [] # will contain a score for each solution for x in product(*el_sums): # each x is a trial of one possible oxidation sum for each element if sum(x) == target_charge: # charge balance condition el_sum_sol = dict(zip(els, x)) # element->oxid_sum # normalize oxid_sum by amount to get avg oxid state sol = {el: v / el_amt[el] for el, v in el_sum_sol.items()} all_sols.append(sol) # add the solution to the list of solutions # determine the score for this solution score = 0 for idx, v in enumerate(x): score += el_sum_scores[idx][v] all_scores.append(score) # sort the solutions by highest to lowest score all_sols = [x for (y, x) in sorted(zip(all_scores, all_sols), key=lambda pair: pair[0], reverse=True)] return all_sols
def setUp(self): self.specie1 = Specie.from_string("Fe2+") self.specie2 = Specie("Fe", 3) self.specie3 = Specie("Fe", 2) self.specie4 = Specie("Fe", 2, {"spin": 5})
"N", "P", "As", "Sb", "O", "S", "Se", "Te", "F", "Cl", "Br", "I"]) module_dir = os.path.dirname(os.path.abspath(__file__)) #Read in BV parameters. BV_PARAMS = {} with open(os.path.join(module_dir, "bvparam_1991.json"), "r") as f: for k, v in json.load(f).items(): BV_PARAMS[Element(k)] = v #Read in json containing data-mined ICSD BV data. with open(os.path.join(module_dir, "icsd_bv.json"), "r") as f: all_data = json.load(f) ICSD_BV_DATA = {Specie.from_string(sp): data for sp, data in all_data["bvsum"].items()} PRIOR_PROB = {Specie.from_string(sp): data for sp, data in all_data["occurrence"].items()} def calculate_bv_sum(site, nn_list, scale_factor=1): """ Calculates the BV sum of a site. Args: site: The site nn_list: List of nearest neighbors in the format [(nn_site, dist), ...]. anion_el:
def Analyze_Voronoi_Nodes(args): """ A standard process to apply all filters. Zeo++ finds all possible polyhedrons and corresponding sites while this class will screen bad sites and merge them. The program currently support CIF input files ONLY; Args: args.cif_file (str): Directory of CIF file args.input_file (yaml): Directory of input file which specify filter parameters. The input file must be a yaml: Mandatory: 1. SPECIE: a string of target diffusion specie, with oxidation state; e.g. Li+: Li specie with +1 oxidation state. Optional: (Each parameter must be added according to filters specified) Overall: 2. ANION: a string of potential anion type in the structure. This parameter will automatically specify parameters for further analysis: BV_UP BV_LW R_CUT However, these parameters will be overwritten if they're explicitly assigned. e.g. S (sulfur) will have R_CUT: 1.5 A, If input yaml file has another R_CUT to be 2 A, the final R_CUT will be 2 A. Currently support following anions: | BV_LW | BV_UP | R_CUT(A) | ------------------------------------------------------ S (sulfur) | 0.4 | 1.1 | 2.5 | O (oxygen) | 0.5 | 1.2 | 2.3 | VoroPerco: 3. PERCO_R: the percolation radius for diffusion specie; VoroBV: 4. BV_UP: the maximum bond valence of a site which considered to be appropriate; 5. BV_LW: the minimum bond valence of a site which considered to be appropriate; Coulomb: 6. R_CUT: the minimum distance between target specie and nearest ion (either anion or cation); VoroLong: 7. LONG: the criteria to decide whether a node is a long node or not. Unit: A; MergeSite: 8. NEIGHBOR: the distance criteria to decide whether 2 sites / nodes are too close to each other. Unit: A. 9. LONG args.filters (str): strings to specify which filter to use in analysis: FILTER: filters applied. Currently support following filters: Ordered: OrderFrameworkFilter PropOxi: OxidationStateFilter VoroPerco: TAPercolateFilter Coulomb: TACoulombReplusionFilter VoroBV: TABvFilter VoroLong: TALongFilter MergeSite: OptimumSiteFilter VoroInfo: TALongFilter, but only output the center coordinates and length of each node Output: CIF files after applying each filter. The predicted sites for target specie will be represented as sites with 50% partial occupancy. Note that some filters may be fundamental (decide whether they're good CIFs or not) and they may have no output structures. e.g. if applying OxidationStateFilter, TAPercolateFilter and TABvFilter, there will be 2 output CIF files: 1. CIF with all accessible sites; 2. CIF with all sites having good bond valence; OxidationStateFilter has no ouput structure. """ import Topological_Analysis # built-in radius for different species va_dir = os.path.dirname(Topological_Analysis.__file__) radii_yaml_dir = os.path.join(va_dir, 'files/radii.yaml') with open(radii_yaml_dir, 'r') as f: radii = yaml.load(f) f.close() # read structure from CIF file name = args.cif_file[:-4] # the last 4 characters are '.cif' precif = CifParser(args.cif_file, occupancy_tolerance=2.0) structure = precif.get_structures(primitive=False)[0].copy() # for input parameter file with open(args.input_file, 'r') as f: input_parameters = yaml.load(f) f.close() # target specie sp = Specie.from_string(input_parameters['SPECIE']) # other possible parameters if 'ANION' in input_parameters.keys(): if input_parameters['ANION'].lower() == 's': bv_range = (0.4, 1.1) rc = 2.5 elif input_parameters['ANION'].lower() == 'o': bv_range = (0.5, 1.2) rc = 2.3 else: print '## Unsupported anion type: {}'.format( input_parameters['ANION']) bv_range = (0, 1.5) rc = 2.0 if 'PERCO_R' in input_parameters.keys(): pr = input_parameters['PERCO_R'] # percolation radius else: pr = None try: # these exist further bond valence limits to overwrite existing ones tmp = bv_range if 'BV_UP' in input_parameters.keys(): bv_range = (bv_range[0], input_parameters['BV_UP']) if 'BV_LW' in input_parameters.keys(): bv_range = (input_parameters['BV_LW'], bv_range[1]) except: # these's no anion type to assign bond valence range if ('BV_UP' in input_parameters.keys()) and ('BV_LW' in input_parameters.keys()): bv_range = (input_parameters['BV_LW'], input_parameters['BV_UP']) else: bv_range = None if 'R_CUT' in input_parameters.keys(): rc = input_parameters['R_CUT'] # cut-off distance of coulomb replusion else: try: tmp = rc # to check whether parameter exists, if it doesn't exist, set it to None. # only necessary for bv_range and r_cut because these 2 may be set by ANION parameter except: rc = None if 'LONG' in input_parameters.keys(): long = input_parameters[ 'LONG'] # cut-off distance to decide whether a node is long or not else: long = None if 'NEIGHBOR' in input_parameters.keys(): nn = input_parameters[ 'NEIGHBOR'] # cut-off distance to decide whether 2 sites are neighbors else: nn = None # temporary parameters for filters applied frame_structure = None org_frame = None node_structure = None predicted_structure = None for f_index, f in enumerate(args.filters): print 'Step {}: {}'.format(f_index, f) if f.lower() == 'ordered': # Check whether the framework is ordered or not. print '# Check framework disordering.' orderFrame = OrderFrameworkFilter(structure.copy(), radii, sp) org_structure = orderFrame.virtual_structure.copy() frame_structure = orderFrame.virtual_framework.copy() org_frame = orderFrame.framework.copy() print '# Check finishes.' if f.lower() == 'propoxi': # Check oxidation states in structures. This is necessary for bond valence filter. print '# Check oxidation states in structure.' PropOxi = OxidationStateFilter(org_structure.copy()) if not PropOxi.decorated: print '## Oxidation state check fails...' sys.exit() else: print '# Check finishes.' elif f.lower() == 'voroperco': # Check whether there's enough space for percolation. print '# Check Voronoi percolation raduis.' if pr: VoroPerco = TAPercolateFilter(org_structure.copy(), radii, sp, pr) else: print '## No percolation radius provided...' sys.exit() if not VoroPerco.analysis_results: print '## Cannot percolate...' sys.exit() else: """ The Voronoi analysis results include: Voronoi_accessed_node_structure: A structure with all nodes (with Voronoi radius added to the property of each node); Voronoi_structure: A structure containing nodes whose Voronoi radius is greater than a certain value; Framework: The framework structure with no target diffusion specie; free_sph_max_dia: Maximum spherical diameter in the structure; ...... To see other results, please use 'analysis_keys' attribute of the class. """ results = deepcopy(VoroPerco.analysis_results) print '# Percolation diameter (A): {}'.format( round(results['free_sph_max_dia'], 3)) output_structure = org_frame.copy() if results['Voronoi_accessed_node_structure']: node_structure = results[ 'Voronoi_accessed_node_structure'].copy() for nodes in node_structure.copy(): output_structure.append(str(sp), nodes.coords, coords_are_cartesian=True) CifWriter(output_structure).write_file( '{}_all_accessed_node.cif'.format(name)) print '# Percolation check finishes.' else: print '## Errors in Voronoi analysis structure...' elif f.lower() == 'coulomb': print '# Check Coulomb replusion effects.' if (not frame_structure) or (not node_structure): print '## No framework and node structure provided for Coulomb Replusion analysis...' sys.exit() elif not rc: print '## No Coulomb replusion cut-off distance provided...' sys.exit() else: if sp.oxi_state < 0: ion = 'anion' else: ion = 'cation' print '# Processing Coulomb replusion check.' print '# {} effect detected, minimum distance to {}s is {} A.'.format( ion, ion, round(rc, 3)) CoulRep = TACoulombReplusionFilter(node_structure.copy(), frame_structure.copy(), prune=ion, min_d_to_ion=rc) if CoulRep.final_structure: node_structure = CoulRep.final_structure.copy() output_structure = org_frame.copy() for node in node_structure.copy(): output_structure.append(str(sp), node.coords, coords_are_cartesian=True) CifWriter(output_structure).write_file( '{}_coulomb_filtered.cif'.format(name)) print '# Coulomb replusion check finishes.' else: print '## All available nodes will experience high Coulomb replusion...' print '## The structure is either unreasonable or the replusion radius cut-off is too large...' sys.exit() elif f.lower() == 'vorobv': print '# Check bond valence limits.' if (not frame_structure) or (not node_structure): print '## No framework and node structure provided for bond valence analysis...' sys.exit() elif not bv_range: print '## No bond valence range provided...' sys.exit() else: print '# Processing bond valence check.' print '# Bond valence limitation: {} - {}'.format( bv_range[0], bv_range[1]) VoroBv = TABvFilter(node_structure.copy(), frame_structure.copy(), bv_range) if VoroBv.final_structure: node_structure = VoroBv.final_structure.copy() output_structure = org_frame.copy() # output cif structure output_doc = {} # output csv file variables = [ 'Cartesian_Coords', 'Voronoi_R', 'Bond_Valence' ] for i in variables: output_doc[i] = [] for node in node_structure.copy(): output_structure.append(str(sp), node.coords, coords_are_cartesian=True) tmp_coords = [round(n, 4) for n in node.coords] output_doc['Cartesian_Coords'].append(tmp_coords) output_doc['Voronoi_R'].append( round(node.properties['voronoi_radius'], 3)) output_doc['Bond_Valence'].append( round(node.properties['valence_state'], 2)) CifWriter(output_structure).write_file( '{}_bond_valence_filtered.cif'.format(name)) df = pds.DataFrame(data=output_doc).sort_values( by=['Voronoi_R']) df = df.reindex(variables, axis=1) df.to_csv('{}_bv_info.csv'.format(name)) print '# Bond valence check finishes.' else: print '## All available nodes are excluded for bad bond valences...' print '## The structure is either unreasonable or the bond valence range is bad...' sys.exit() elif f.lower() == 'vorolong': print '# Check long nodes in structure.' if not node_structure: print '## No node structure provided for long Voronoi node analysis...' sys.exit() elif not long: print '## No length provided to decide Voronoi node length...' sys.exit() else: print '# Processing Voronoi length check.' print '# Voronoi length limitation: {} A'.format( round(long, 3)) VoroLong = TALongFilter(node_structure.copy(), long, use_voro_radii=True) print '# Maximum node length detected: {} A'.format( round(VoroLong.longest_node_length, 3)) output_doc = {} variables = ['Center_Coords', 'Node_Length'] for i in variables: output_doc[i] = [] for i in VoroLong.clusters: tmp_coords = [round(n, 4) for n in i[0]] output_doc['Center_Coords'].append(tmp_coords) output_doc['Node_Length'].append(round(i[1], 4)) df = pds.DataFrame(data=output_doc).sort_values( by=['Node_Length']) df = df.reindex(variables, axis=1) df.to_csv('{}_node_length_info.csv'.format(name)) print '# Central node information written.' if VoroLong.has_long_node: print '# Long node check finishes.' else: print '## The structure has no long nodes or node length restriction is bad...' print '## Please check the node length CSV for more information...' sys.exit() elif f.lower() == 'voroinfo': print '# Output the center coordinates and length of each node......' if not node_structure: print '## No node structure provided for Voronoi information...' sys.exit() else: VoroLong = TALongFilter(node_structure.copy(), 0, use_voro_radii=True) print '# Maximum node length detected: {} A'.format( round(VoroLong.longest_node_length, 3)) output_doc = {} variables = ['Center_Coords', 'Node_Length'] for i in variables: output_doc[i] = [] for i in VoroLong.clusters: tmp_coords = [round(n, 4) for n in i[0]] output_doc['Center_Coords'].append(tmp_coords) output_doc['Node_Length'].append(round(i[1], 4)) df = pds.DataFrame(data=output_doc).sort_values( by=['Node_Length']) df = df.reindex(variables, axis=1) df.to_csv('{}_node_length_info.csv'.format(name)) print '# Voronoi node information written.' elif f.lower() == 'mergesite': # before we use TAOptimumSiteFilter, we need to have a list of different clusters, # thus must use TADenseNeighbor and TALongFilter. Also note that all clusters in the list must be. if not node_structure: print '## No node structure provided for optimizing sites...' sys.exit() if (not nn) or (not long): print '## No neighbor distance cut-off and long node cut-off provided for site optimization...' sys.exit() voro_dense = TADenseNeighbor(node_structure.copy(), close_criteria=1, big_node_radius=0, radius_range=[0, 0], use_radii_ratio=True) voro_long = TALongFilter(node_structure.copy(), 0, use_voro_radii=True) cluster_list = voro_dense.clustering(node_structure.copy(), 1, True, True) long_list = [] short_list = [] for i in cluster_list: if voro_long.get_cluster_length(i, use_voro_radii=True) >= long: long_list.append(i) else: short_list.append(i) print '# Processing site optimization: nearest neighbor cut-off {} A.'.format( round(nn, 3)) OpSite = TAOptimumSiteFilter(org_structure.copy(), nn, sp, sort_type='None', use_exp_ordered_site=False) opt_long_list = [] opt_short_list = [] for i in long_list: tmp_list = OpSite.optimize_cluster(i, nn, sort_type='radius') for j in tmp_list: opt_long_list.append(j) for i in short_list: tmp_list = OpSite.optimize_cluster(i, nn, sort_type='radius') for j in tmp_list: opt_short_list.append(j) print '# Long node number: {}'.format(len(opt_long_list)) print '# Short node number: {}'.format(len(opt_short_list)) new_list = [] for i in opt_long_list: new_list.append(i) for i in opt_short_list: new_list.append(i) OpSite.add_cluster(new_list) output_structure = OpSite.site_structure.copy() half_list = [ ] # it seems 50% occupancy sites are easier to see. You may directly use output_structure otherwise for i in output_structure: ppt = deepcopy(i.properties) new_i = PeriodicSite({str(sp): 0.5}, i.coords, i.lattice, to_unit_cell=False, coords_are_cartesian=True, properties=ppt) half_list.append(new_i) half_structure = Structure.from_sites(half_list) CifWriter(half_structure).write_file( '{}_{}_optimized_sites.cif'.format(name, 'radius')) # CifWriter(output_structure).write_file('{}_{}_optimized_sites.cif'.format(name, 'radius')) # for predicted structure: tot_num = org_structure.composition[sp] current_num = OpSite.site_structure.composition.num_atoms ratio = tot_num / current_num if ratio > 1: print '## Prediction error, please be cautious about the predicted results.' print '## Please also double check whether the input parameters are reasonable...' ratio = 1 prediction = org_frame.copy() for site in OpSite.site_structure.copy(): prediction.append({str(sp): ratio}, site.coords, coords_are_cartesian=True) prediction.sort() predicted_structure = prediction.copy() print '# Site optimization finishes.' else: print '## Unsupported operation...' if predicted_structure: comp = org_structure.composition.reduced_formula CifWriter(predicted_structure).write_file('{}_{}_predicted.cif'.format( name, comp)) cmds = cmd_by_radius(half_structure, 0.5) cmd_file = open('{}_cmd'.format(name), 'w') cmd_file.write('mol new\n') for lines in cmds: cmd_file.write(lines) cmd_file.close()
def _get_oxid_state_guesses(self, all_oxi_states, max_sites, oxi_states_override, target_charge): """ Utility operation for guessing oxidation states. See `oxi_state_guesses` for full details. This operation does the calculation of the most likely oxidation states Args: oxi_states_override (dict): dict of str->list to override an element's common oxidation states, e.g. {"V": [2,3,4,5]} target_charge (int): the desired total charge on the structure. Default is 0 signifying charge balance. all_oxi_states (bool): if True, an element defaults to all oxidation states in pymatgen Element.icsd_oxidation_states. Otherwise, default is Element.common_oxidation_states. Note that the full oxidation state list is *very* inclusive and can produce nonsensical results. max_sites (int): if possible, will reduce Compositions to at most this many many sites to speed up oxidation state guesses. Set to -1 to just reduce fully. Returns: A list of dicts - each dict reports an element symbol and average oxidation state across all sites in that composition. If the composition is not charge balanced, an empty list is returned. A list of dicts - each dict maps the element symbol to a list of oxidation states for each site of that element. For example, Fe3O4 could return a list of [2,2,2,3,3,3] for the oxidation states of If the composition is """ comp = self.copy() # reduce Composition if necessary if max_sites == -1: comp = self.reduced_composition elif max_sites and comp.num_atoms > max_sites: reduced_comp, reduced_factor = self. \ get_reduced_composition_and_factor() if reduced_factor > 1: reduced_comp *= max(1, int(max_sites / reduced_comp.num_atoms)) comp = reduced_comp # as close to max_sites as possible if comp.num_atoms > max_sites: raise ValueError("Composition {} cannot accommodate max_sites " "setting!".format(comp)) # Load prior probabilities of oxidation states, used to rank solutions if not Composition.oxi_prob: module_dir = os.path.join(os.path. dirname(os.path.abspath(__file__))) all_data = loadfn(os.path.join(module_dir, "..", "analysis", "icsd_bv.yaml")) Composition.oxi_prob = {Specie.from_string(sp): data for sp, data in all_data["occurrence"].items()} oxi_states_override = oxi_states_override or {} # assert: Composition only has integer amounts if not all(amt == int(amt) for amt in comp.values()): raise ValueError("Charge balance analysis requires integer " "values in Composition!") # for each element, determine all possible sum of oxidations # (taking into account nsites for that particular element) el_amt = comp.get_el_amt_dict() els = el_amt.keys() el_sums = [] # matrix: dim1= el_idx, dim2=possible sums el_sum_scores = defaultdict(set) # dict of el_idx, sum -> score el_best_oxid_combo = {} # dict of el_idx, sum -> oxid combo with best score for idx, el in enumerate(els): el_sum_scores[idx] = {} el_best_oxid_combo[idx] = {} el_sums.append([]) if oxi_states_override.get(el): oxids = oxi_states_override[el] elif all_oxi_states: oxids = Element(el).oxidation_states else: oxids = Element(el).icsd_oxidation_states or \ Element(el).oxidation_states # get all possible combinations of oxidation states # and sum each combination for oxid_combo in combinations_with_replacement(oxids, int(el_amt[el])): # List this sum as a possible option oxid_sum = sum(oxid_combo) if oxid_sum not in el_sums[idx]: el_sums[idx].append(oxid_sum) # Determine how probable is this combo? score = sum([Composition.oxi_prob.get(Specie(el, o), 0) for o in oxid_combo]) # If it is the most probable combo for a certain sum, # store the combination if oxid_sum not in el_sum_scores[idx] or score > el_sum_scores[idx].get(oxid_sum, 0): el_sum_scores[idx][oxid_sum] = score el_best_oxid_combo[idx][oxid_sum] = oxid_combo # Determine which combination of oxidation states for each element # is the most probable all_sols = [] # will contain all solutions all_oxid_combo = [] # will contain the best combination of oxidation states for each site all_scores = [] # will contain a score for each solution for x in product(*el_sums): # each x is a trial of one possible oxidation sum for each element if sum(x) == target_charge: # charge balance condition el_sum_sol = dict(zip(els, x)) # element->oxid_sum # normalize oxid_sum by amount to get avg oxid state sol = {el: v / el_amt[el] for el, v in el_sum_sol.items()} all_sols.append(sol) # add the solution to the list of solutions # determine the score for this solution score = 0 for idx, v in enumerate(x): score += el_sum_scores[idx][v] all_scores.append(score) # collect the combination of oxidation states for each site all_oxid_combo.append(dict((e,el_best_oxid_combo[idx][v]) for idx, (e,v) in enumerate(zip(els,x)))) # sort the solutions by highest to lowest score if len(all_scores) > 0: all_sols, all_oxid_combo = zip(*[(y, x) for (z, y, x) in sorted(zip(all_scores, all_sols, all_oxid_combo), key=lambda pair: pair[0], reverse=True)]) return all_sols, all_oxid_combo
"H", "B", "C", "Si", "N", "P", "As", "Sb", "O", "S", "Se", "Te", "F", "Cl", "Br", "I" ] ] module_dir = os.path.dirname(os.path.abspath(__file__)) # Read in BV parameters. BV_PARAMS = {} for k, v in loadfn(os.path.join(module_dir, "bvparam_1991.yaml")).items(): BV_PARAMS[Element(k)] = v # Read in yaml containing data-mined ICSD BV data. all_data = loadfn(os.path.join(module_dir, "icsd_bv.yaml")) ICSD_BV_DATA = { Specie.from_string(sp): data for sp, data in all_data["bvsum"].items() } PRIOR_PROB = { Specie.from_string(sp): data for sp, data in all_data["occurrence"].items() } def calculate_bv_sum(site, nn_list, scale_factor=1.0): """ Calculates the BV sum of a site. Args: site (PeriodicSite): The central site to calculate the bond valence nn_list ([Neighbor]): A list of namedtuple Neighbors having "distance"
def Analyze_VASP_MD(args): """ Analyze diffusivity from a series vasprun.xml (or vasprun.xml.gz) files at one temperature :param args: please check main function for details of args :return: """ vasprun_dirs = [] for i in range(args.runs_start, args.runs_end + 1): if os.path.exists( os.path.join(args.folder_feature + str(i), 'vasprun.xml.gz')): vasprun_dirs.append( os.path.join(args.folder_feature + str(i), 'vasprun.xml.gz')) elif os.path.exists( os.path.join(args.folder_feature + str(i), 'vasprun.xml')): vasprun_dirs.append( os.path.join(args.folder_feature + str(i), 'vasprun.xml')) else: raise Exception( "No vasprun.xml or vasprun.xml.gz in folder {}".format( args.folder_feature + str(i))) # In analyzing Arrhenius relationship, it is required to provide charged specie. To keep consistent, I also # require charged specie, even it is not necessary specie = Specie.from_string(args.specie) da = DiffusivityAnalyzer.from_files(vasprun_dirs, str(specie.element), step_skip=args.step_skip, ncores=args.ncores, time_intervals_number=args.time_intervals_number, spec_dict={'lower_bound': args.lower_bound_in_a_square \ * args.site_distance \ * args.site_distance, 'upper_bound': args.upper_bound, 'minimum_msd_diff': args.minimum_msd_diff_in_a_square \ * args.site_distance \ * args.site_distance, } ) ea = ErrorAnalysisFromDiffusivityAnalyzer(da, site_distance=args.site_distance) if da.diffusivity > 0: # The linear fitting succeed summary_info = ea.get_summary_dict(oxidized_specie=args.specie) # if the msd profile of the MD doesn't fulfill the fitting requirements, # da.diffusivity is set to be negative else: summary_info = { "diffusion result": "MSD calculated from MD doesn't fulfill the fitting requirement", "max msd": max(da.msd), 'msd': da.msd, 'dt': da.dt, 'msd_component': da.msd_component } print("Output msd-dt into {}K_msd-dt.csv".format(int(da.temperature))) args.msd_file = "{}K_msd-dt.csv".format(int(da.temperature)) # output print("=" * 40) print("Used vasprun.xml files") print("Start run: {}, end run: {}".format(vasprun_dirs[0], vasprun_dirs[-1])) print("=" * 40) # results table header_result = ("Parameter", "Value") result_table = PrettyTable(header_result) result_table.align["Parameter"] = "l" for k, v in summary_info.items(): if k not in ['msd', 'dt', 'msd_component']: result_table.add_row([k, str(v)]) result_table.add_row(['composition', str(da.structure.composition)]) print("Results table: ") print("Diffusivity unit: cm^2/s, Conductivity unit: mS/cm") print(result_table.get_string(sortby='Parameter')) # print(citing_info) # whether output msd if args.msd_file: print("Output msd-dt into file: {}".format(args.msd_file)) with open(args.msd_file, 'w') as fp: w_csv = csv.writer(fp, delimiter=',') data = [ summary_info['dt'], summary_info['msd'], summary_info['msd_component'][0], summary_info['msd_component'][1], summary_info['msd_component'][2] ] w_csv.writerows([[ "dt (fs)", "msd (A^2)", "msd_component_0", "msd_component_1", "msd_component_2" ]]) w_csv.writerows(zip(*data))
def oxi_state_guesses(self, oxi_states_override=None, target_charge=0, all_oxi_states=False, max_sites=None): """ Checks if the composition is charge-balanced and returns back all charge-balanced oxidation state combinations. Composition must have integer values. Note that more num_atoms in the composition gives more degrees of freedom. e.g., if possible oxidation states of element X are [2,4] and Y are [-3], then XY is not charge balanced but X2Y2 is. Results are returned from most to least probable based on ICSD statistics. Use max_sites to improve performance if needed. Args: oxi_states_override (dict): dict of str->list to override an element's common oxidation states, e.g. {"V": [2,3,4,5]} target_charge (int): the desired total charge on the structure. Default is 0 signifying charge balance. all_oxi_states (bool): if True, an element defaults to all oxidation states in pymatgen Element.icsd_oxidation_states. Otherwise, default is Element.common_oxidation_states. Note that the full oxidation state list is *very* inclusive and can produce nonsensical results. max_sites (int): if possible, will reduce Compositions to at most this many many sites to speed up oxidation state guesses. Set to -1 to just reduce fully. Returns: A list of dicts - each dict reports an element symbol and average oxidation state across all sites in that composition. If the composition is not charge balanced, an empty list is returned. """ comp = self.copy() # reduce Composition if necessary if max_sites == -1: comp = self.reduced_composition elif max_sites and comp.num_atoms > max_sites: reduced_comp, reduced_factor = self.\ get_reduced_composition_and_factor() if reduced_factor > 1: reduced_comp *= max(1, int(max_sites / reduced_comp.num_atoms)) comp = reduced_comp # as close to max_sites as possible if comp.num_atoms > max_sites: raise ValueError("Composition {} cannot accommodate max_sites " "setting!".format(comp)) # Load prior probabilities of oxidation states, used to rank solutions if not Composition.oxi_prob: module_dir = os.path.join( os.path.dirname(os.path.abspath(__file__))) all_data = loadfn( os.path.join(module_dir, "..", "analysis", "icsd_bv.yaml")) Composition.oxi_prob = { Specie.from_string(sp): data for sp, data in all_data["occurrence"].items() } oxi_states_override = oxi_states_override or {} # assert: Composition only has integer amounts if not all(amt == int(amt) for amt in comp.values()): raise ValueError("Charge balance analysis requires integer " "values in Composition!") # for each element, determine all possible sum of oxidations # (taking into account nsites for that particular element) el_amt = comp.get_el_amt_dict() els = el_amt.keys() el_sums = [] # matrix: dim1= el_idx, dim2=possible sums el_sum_scores = defaultdict(set) # dict of el_idx, sum -> score for idx, el in enumerate(els): el_sum_scores[idx] = {} el_sums.append([]) if oxi_states_override.get(el): oxids = oxi_states_override[el] elif all_oxi_states: oxids = Element(el).oxidation_states else: oxids = Element(el).icsd_oxidation_states or \ Element(el).oxidation_states # get all possible combinations of oxidation states # and sum each combination for oxid_combo in combinations_with_replacement( oxids, int(el_amt[el])): if sum(oxid_combo) not in el_sums[idx]: el_sums[idx].append(sum(oxid_combo)) score = sum([ Composition.oxi_prob.get(Specie(el, o), 0) for o in oxid_combo ]) # how probable is this combo? el_sum_scores[idx][sum(oxid_combo)] = max( el_sum_scores[idx].get(sum(oxid_combo), 0), score) all_sols = [] # will contain all solutions all_scores = [] # will contain a score for each solution for x in product(*el_sums): # each x is a trial of one possible oxidation sum for each element if sum(x) == target_charge: # charge balance condition el_sum_sol = dict(zip(els, x)) # element->oxid_sum # normalize oxid_sum by amount to get avg oxid state sol = {el: v / el_amt[el] for el, v in el_sum_sol.items()} all_sols.append( sol) # add the solution to the list of solutions # determine the score for this solution score = 0 for idx, v in enumerate(x): score += el_sum_scores[idx][v] all_scores.append(score) # sort the solutions by highest to lowest score all_sols = [ x for (y, x) in sorted(zip(all_scores, all_sols), key=lambda pair: pair[0], reverse=True) ] return all_sols
"N", "P", "As", "Sb", "O", "S", "Se", "Te", "F", "Cl", "Br", "I"]) module_dir = os.path.dirname(os.path.abspath(__file__)) #Read in BV parameters. BV_PARAMS = {} with open(os.path.join(module_dir, "bvparam_1991.json"), "r") as f: for k, v in json.load(f).items(): BV_PARAMS[Element(k)] = v #Read in json containing data-mined ICSD BV data. with open(os.path.join(module_dir, "icsd_bv.json"), "r") as f: all_data = json.load(f) ICSD_BV_DATA = {Specie.from_string(sp): data for sp, data in all_data["bvsum"].items()} PRIOR_PROB = {Specie.from_string(sp): data for sp, data in all_data["occurrence"].items()} def calculate_bv_sum(site, nn_list, scale_factor=1.0): """ Calculates the BV sum of a site. Args: site: The site nn_list: List of nearest neighbors in the format [(nn_site, dist), ...]. anion_el:
def _get_oxid_state_guesses(self, all_oxi_states, max_sites, oxi_states_override, target_charge): """ Utility operation for guessing oxidation states. See `oxi_state_guesses` for full details. This operation does the calculation of the most likely oxidation states Args: oxi_states_override (dict): dict of str->list to override an element's common oxidation states, e.g. {"V": [2,3,4,5]} target_charge (int): the desired total charge on the structure. Default is 0 signifying charge balance. all_oxi_states (bool): if True, an element defaults to all oxidation states in pymatgen Element.icsd_oxidation_states. Otherwise, default is Element.common_oxidation_states. Note that the full oxidation state list is *very* inclusive and can produce nonsensical results. max_sites (int): if possible, will reduce Compositions to at most this many many sites to speed up oxidation state guesses. Set to -1 to just reduce fully. Returns: A list of dicts - each dict reports an element symbol and average oxidation state across all sites in that composition. If the composition is not charge balanced, an empty list is returned. A list of dicts - each dict maps the element symbol to a list of oxidation states for each site of that element. For example, Fe3O4 could return a list of [2,2,2,3,3,3] for the oxidation states of If the composition is """ comp = self.copy() # reduce Composition if necessary if max_sites == -1: comp = self.reduced_composition elif max_sites and comp.num_atoms > max_sites: reduced_comp, reduced_factor = self. \ get_reduced_composition_and_factor() if reduced_factor > 1: reduced_comp *= max(1, int(max_sites / reduced_comp.num_atoms)) comp = reduced_comp # as close to max_sites as possible if comp.num_atoms > max_sites: raise ValueError("Composition {} cannot accommodate max_sites " "setting!".format(comp)) # Load prior probabilities of oxidation states, used to rank solutions if not Composition.oxi_prob: module_dir = os.path.join( os.path.dirname(os.path.abspath(__file__))) all_data = loadfn( os.path.join(module_dir, "..", "analysis", "icsd_bv.yaml")) Composition.oxi_prob = { Specie.from_string(sp): data for sp, data in all_data["occurrence"].items() } oxi_states_override = oxi_states_override or {} # assert: Composition only has integer amounts if not all(amt == int(amt) for amt in comp.values()): raise ValueError("Charge balance analysis requires integer " "values in Composition!") # for each element, determine all possible sum of oxidations # (taking into account nsites for that particular element) el_amt = comp.get_el_amt_dict() els = el_amt.keys() el_sums = [] # matrix: dim1= el_idx, dim2=possible sums el_sum_scores = defaultdict(set) # dict of el_idx, sum -> score el_best_oxid_combo = { } # dict of el_idx, sum -> oxid combo with best score for idx, el in enumerate(els): el_sum_scores[idx] = {} el_best_oxid_combo[idx] = {} el_sums.append([]) if oxi_states_override.get(el): oxids = oxi_states_override[el] elif all_oxi_states: oxids = Element(el).oxidation_states else: oxids = Element(el).icsd_oxidation_states or \ Element(el).oxidation_states # get all possible combinations of oxidation states # and sum each combination for oxid_combo in combinations_with_replacement( oxids, int(el_amt[el])): # List this sum as a possible option oxid_sum = sum(oxid_combo) if oxid_sum not in el_sums[idx]: el_sums[idx].append(oxid_sum) # Determine how probable is this combo? score = sum([ Composition.oxi_prob.get(Specie(el, o), 0) for o in oxid_combo ]) # If it is the most probable combo for a certain sum, # store the combination if oxid_sum not in el_sum_scores[ idx] or score > el_sum_scores[idx].get(oxid_sum, 0): el_sum_scores[idx][oxid_sum] = score el_best_oxid_combo[idx][oxid_sum] = oxid_combo # Determine which combination of oxidation states for each element # is the most probable all_sols = [] # will contain all solutions all_oxid_combo = [ ] # will contain the best combination of oxidation states for each site all_scores = [] # will contain a score for each solution for x in product(*el_sums): # each x is a trial of one possible oxidation sum for each element if sum(x) == target_charge: # charge balance condition el_sum_sol = dict(zip(els, x)) # element->oxid_sum # normalize oxid_sum by amount to get avg oxid state sol = {el: v / el_amt[el] for el, v in el_sum_sol.items()} all_sols.append( sol) # add the solution to the list of solutions # determine the score for this solution score = 0 for idx, v in enumerate(x): score += el_sum_scores[idx][v] all_scores.append(score) # collect the combination of oxidation states for each site all_oxid_combo.append( dict((e, el_best_oxid_combo[idx][v]) for idx, (e, v) in enumerate(zip(els, x)))) # sort the solutions by highest to lowest score if len(all_scores) > 0: all_sols, all_oxid_combo = zip( *[(y, x) for (z, y, x) in sorted(zip(all_scores, all_sols, all_oxid_combo), key=lambda pair: pair[0], reverse=True)]) return all_sols, all_oxid_combo