def ternary_smact_combos(position1, position2, position3, threshold=8): """ Combinatorially generate Pymatgen Species compositions using SMACT when up to three different lists are needed to draw species from (e.g. Ternary metal halides.) Args: position(n) (list of species): Species to be considered iteratively for each position. threshold (int): Max stoichiometry threshold. Returns: species_comps (list): Compositions as tuples of Pymatgen Species objects. """ initial_comps_list = [] for sp1, sp2, an in tqdm(itertools.product(position1, position2, position3)): e1, oxst1 = sp1.symbol, int(sp1.oxi_state) eneg1 = Element(e1).pauling_eneg e2, oxst2 = sp2.symbol, int(sp2.oxi_state) eneg2 = Element(e2).pauling_eneg e3, oxst3 = an.symbol, int(an.oxi_state) eneg3 = Element(e3).pauling_eneg symbols = [e1, e2, e3] ox_states = [oxst1, oxst2, oxst3] cn_e, cn_r = neutral_ratios(ox_states, threshold=threshold) if cn_e: enegs = [eneg1, eneg2, eneg3] eneg_ok = pauling_test(ox_states, enegs, symbols=symbols, repeat_cations=False) if eneg_ok: for ratio in cn_r: comp = (symbols, ox_states, list(ratio)) initial_comps_list.append(comp) print('Number of compositions before reduction: {}'.format( len(initial_comps_list))) # Create a list of pymatgen species for each comp print('Converting to Pymatgen Species...') species_comps = [] for i in tqdm(initial_comps_list): comp = {} for sym, ox, ratio in zip(i[0], i[1], i[2]): comp[Specie(sym, ox)] = ratio comp_list = [[key] * val for key, val in comp.items()] comp_list = [item for sublist in comp_list for item in sublist] species_comps.append(comp_list) # Sort and ditch duplicates print( 'Ditching duplicates (sorry to have got your hopes up with the big numbers)...' ) for i in species_comps: i.sort() i.sort(key=lambda x: x.oxi_state, reverse=True) species_comps = list(set([tuple(i) for i in species_comps])) print('Total number of new compounds unique compositions: {0}'.format( len(species_comps))) return species_comps
def ml_rep_generator(composition, stoichs=None): """Function to take a composition of Elements and returns a list of values between 0 and 1 that describes the composition, useful for machine learning. The list is of length 103 as there are 103 elements considered in total in SMACT. e.g. Li2O --> [0, 0, 2/3, 0, 0, 0, 0, 1/3, 0 ....] Inspired by the representation used by Legrain et al. DOI: 10.1021/acs.chemmater.7b00789 Args: composition (list): Element objects in composition OR symbols of elements in composition stoichs (list): Corresponding stoichiometries in the composition Returns: norm (list): List of floats representing the composition that sum to one """ if stoichs == None: stoichs = [1 for i, el in enumerate(composition)] ML_rep = [0 for i in range(1, 103)] if type(composition[0]) == Element: for element, stoich in zip(composition, stoichs): ML_rep[int(element.number) - 1] += stoich else: for element, stoich in zip(composition, stoichs): ML_rep[int(Element(element).number) - 1] += stoich norm = [float(i) / sum(ML_rep) for i in ML_rep] return norm
def find_instances(anion, structures): """Finds the number of instances of each species in a list of pymatgen Structure objects when a given anion is the most electronegative one present. Also adds most electronegative anion to the dictionary. Args: anion (Pymatgen Species): Anion of interest structures (list): Dictionaries containing pymatgen Structures """ an_containing = [] for i in structures: if anion in i['structure'].composition: # Check whether anion most electronegative element an_eneg = Element(anion.symbol).pauling_eneg all_enegs = [Element(sp.symbol).pauling_eneg for \ sp in i['structure'].composition] if all(eneg <= an_eneg for eneg in all_enegs): comp = [j for j in i['structure'].composition] i['most_eneg_anion'] = anion an_containing.append(comp) an_containing = [i for sublist in an_containing for i in sublist] an_containing = dict(Counter(an_containing)) an_containing.pop(anion) return (an_containing)
def plot_metal(metal, list_scores, spec_list, show_legend=False): """ Plot distribution of species for individual metals. Args: metal (str): metal element of interest to plot list_scores (dict): Lists of scores for the species in spec_list keyed by anion. spec_list (list): Pymatgen species in same order as corresponding scores in each list within the dict list_scores show_legend (bool): display legend on plot """ # Set initial very daft x and y range values to be adjusted below min_x, max_x = 20, -20 max_y = 0 overall_list = [] for anion in list_scores.keys(): an_dict = {} for spec, score in zip(spec_list, list_scores[anion]): if spec.symbol == metal: an_dict[spec.oxi_state] = score sorted_list = sorted(an_dict.items()) if an_dict: x, y = zip(*sorted_list) x = list(x) y = list(y) overall_list.append([x, y, anion]) if min(x) < min_x: min_x = min(x) if max(x) > max_x: max_x = max(x) if max(y) > max_y: max_y = max(y) else: overall_list.append([[1], [0], anion]) # Plotting labels = [ "$F^-$", "$O^{2-}$", "$Cl^-$", "$Br^-$", "$I^-$", "$S^{2-}$", "$Se^{2-}$", "$Te^{2-}$" ] # Aesthetics and bar positions width = 1. / 10. pos = 0 colours = [ '#E51200', '#DF5400', '#DA9300', '#D4CE00', '#97CF00', '#57C900', '#1BC400', '#00BF1D' ] for col, anion in enumerate(overall_list): pos += width plt.bar(np.array(anion[0]) + pos, anion[1], width, label=labels[col], color=colours[col]) if show_legend: plt.legend(prop={'size': 24}) plt.xticks( np.arange(min_x, max_x + 1) + 0.5, np.arange(min_x, max_x + 1, dtype=np.int_)) if min_x < 0: min_x = 0 plt.xlim(min_x, max_x + 1) plt.ylim(0, 1.19) plt.xlabel('Oxidation state') plt.ylabel('Species fraction') at_no = int(Element(metal).number) mass = Element(metal).mass plt.text(np.mean([max_x, min_x]) + 0.5, 1.0, "$^{{{0}}}$\n {1}\n$_{{{2:.2f}}}$".format(at_no, metal, mass), bbox={ 'facecolor': 'gray', 'alpha': 0.3, 'pad': 20 }, size=28) plt.tight_layout() plt.savefig('OxidationState_score_{0}'.format(metal, dpi=300)) plt.show()
def species_totals(structures, count_elements=False, anions=[], edit_structures_dicts=True, return_species_list=False): """Given a set of pymatgen structures in the form of dictionaries where the Structure is keyed as 'structure', returns the number of compounds that features each Species. Args: structures (list): dictionaries containing pymatgen Structures. count_elements (bool): switch to counting elements not species. anions (list): Pymatgen.Species anions of interestself. edit_structure_dicts (bool): Modify the dicts in the structures list to add a 'most_eneg_anion' key. Returns: totals (dict): Totals of each species in structure list. or an_containing (dict): Totals of each species separated by anion. species_list (optional): List of species for structures as generated by get_unique_species. """ # Simple method if simply counting all species or elements if not anions: totals = [] if count_elements: for i in structures: comp = [j.symbol for j in i['structure'].composition] totals.append(comp) totals = [i for sublist in totals for i in sublist] totals = dict(Counter(totals)) else: for i in structures: comp = [j for j in i['structure'].composition] totals.append(comp) totals = [i for sublist in totals for i in sublist] totals = dict(Counter(totals)) # Method used if collecting count per anion else: totals = {} for anion in tqdm(anions): an_containing = [] for i in structures: if anion in i['structure'].composition: # Check whether anion is most electronegative element an_eneg = Element(anion.symbol).pauling_eneg all_enegs = [Element(sp.symbol).pauling_eneg for \ sp in i['structure'].composition] if all(eneg <= an_eneg for eneg in all_enegs): comp = [j for j in i['structure'].composition] an_containing.append(comp) if edit_structures_dicts: i['most_eneg_anion'] = anion an_containing = [i for sublist in an_containing for i in sublist] an_containing = dict(Counter(an_containing)) an_containing.pop(anion) totals[anion] = an_containing # Return objects based on whether species list required if return_species_list: return (totals, get_unique_species(structures)) else: return (totals)
def assign_prob(structures, scoring='overall_score', verbose=False, edit_struc_dict=True, list_scores=None, species_list=None): """ Assigns probability values to structures based on the list of score values. Args: structures (list): Pymatgen Structures, keyed under 'structure'. list_scores (dict): Lists of scores for the species in spec_list keyed by anion (as produced by generate_scores). Default values used from Faraday Discussions paper (DOI: 10.1039/C8FD00032H) if none supplied. species_list (list): Pymatgen species in same order as corresponding lists in list_scores. Default values used from Faraday Discussions paper (DOI: 10.1039/C8FD00032H) if none supplied. scoring (str): Can be either: overall_score - Mean species-anion score for each species of interest in the composition. limiting_score - As above but minimum species-anion score. probability - Product of scores. probability_simple - Product of scores for different species only (set(comp)) verbose (bool): Explicitly print any compounds that were skipped over due to the elements they contain. edit_struc_dict (bool): Add the probability to the dicts in the structures list. Returns: probabilities_list (list): Score for each structure in structures. """ if not list_scores: # Import default list_scores from data directory with open( os.path.join( data_directory, 'oxidation_states/oxidationstates_prob_table.csv'), 'r') as f: reader = csv.reader(f) list_scores = {eval(rows[0]): eval(rows[1]) for rows in reader} print('INFO: Using default list_scores.') if not species_list: # Import species_list from data directory with open( os.path.join(data_directory, 'oxidation_states/species_list.txt'), 'r') as f: species_list = eval(f.readline()) print('INFO: Using default species_list.') scores_dict = {} for key in list_scores.keys(): an = {} for spec, val in zip(species_list, list_scores[key]): an[spec] = val scores_dict[key] = an probabilities_list = [] for struc in structures: scores = [] comp = set(list(struc['structure'].species)) comp = [(sp.symbol, sp.oxi_state) for sp in comp] an_symbols = [an[0] for an in list_scores.keys()] if 'most_eneg_anion' not in struc.keys(): # Get most eneg element in struc els = [Element(e[0]) for e in comp] els.sort(key=lambda x: x.pauling_eneg, reverse=True) most_eneg = els[0].symbol if most_eneg in an_symbols: for poss in list_scores.keys(): if poss[0] == most_eneg: struc['most_eneg_anion'] = poss else: print( 'No data available for most electronegative anion in structure.' ) try: scores = [scores_dict[struc['most_eneg_anion']][sp] for sp in comp \ if sp in species_list] if scoring == 'overall_score': overall_score = np.mean(scores) elif scoring == 'limiting_score': overall_score = min(scores) elif scoring == 'probability': overall_score = np.prod(scores) elif scoring == 'probability_simple': scores = [scores_dict[struc['most_eneg_anion']][sp] for sp in list(set(comp)) \ if sp in species_list] overall_score = np.prod(scores) except: if verbose: print('Could not get score for: {}'.format(comp)) overall_score = 0 if edit_struc_dict: struc['probability'] = overall_score probabilities_list.append(overall_score) return probabilities_list