def ternary_smact_combos(position1, position2, position3, threshold=8):
    """ Combinatorially generate Pymatgen Species compositions using SMACT when up to three different
        lists are needed to draw species from (e.g. Ternary metal halides.)
    Args:
        position(n) (list of species): Species to be considered iteratively for each
                                     position.
        threshold (int): Max stoichiometry threshold.
    Returns:
        species_comps (list): Compositions as tuples of Pymatgen Species objects.
        """

    initial_comps_list = []
    for sp1, sp2, an in tqdm(itertools.product(position1, position2,
                                               position3)):
        e1, oxst1 = sp1.symbol, int(sp1.oxi_state)
        eneg1 = Element(e1).pauling_eneg
        e2, oxst2 = sp2.symbol, int(sp2.oxi_state)
        eneg2 = Element(e2).pauling_eneg
        e3, oxst3 = an.symbol, int(an.oxi_state)
        eneg3 = Element(e3).pauling_eneg

        symbols = [e1, e2, e3]
        ox_states = [oxst1, oxst2, oxst3]
        cn_e, cn_r = neutral_ratios(ox_states, threshold=threshold)

        if cn_e:
            enegs = [eneg1, eneg2, eneg3]
            eneg_ok = pauling_test(ox_states,
                                   enegs,
                                   symbols=symbols,
                                   repeat_cations=False)
            if eneg_ok:
                for ratio in cn_r:
                    comp = (symbols, ox_states, list(ratio))
                    initial_comps_list.append(comp)
    print('Number of compositions before reduction:  {}'.format(
        len(initial_comps_list)))

    # Create a list of pymatgen species for each comp
    print('Converting to Pymatgen Species...')
    species_comps = []
    for i in tqdm(initial_comps_list):
        comp = {}
        for sym, ox, ratio in zip(i[0], i[1], i[2]):
            comp[Specie(sym, ox)] = ratio
        comp_list = [[key] * val for key, val in comp.items()]
        comp_list = [item for sublist in comp_list for item in sublist]
        species_comps.append(comp_list)

    # Sort and ditch duplicates
    print(
        'Ditching duplicates (sorry to have got your hopes up with the big numbers)...'
    )
    for i in species_comps:
        i.sort()
        i.sort(key=lambda x: x.oxi_state, reverse=True)
    species_comps = list(set([tuple(i) for i in species_comps]))
    print('Total number of new compounds unique compositions: {0}'.format(
        len(species_comps)))
    return species_comps
Example #2
0
def ml_rep_generator(composition, stoichs=None):
    """Function to take a composition of Elements and returns a
    list of values between 0 and 1 that describes the composition,
    useful for machine learning.

    The list is of length 103 as there are 103 elements
    considered in total in SMACT.

    e.g. Li2O --> [0, 0, 2/3, 0, 0, 0, 0, 1/3, 0 ....]

    Inspired by the representation used by Legrain et al. DOI: 10.1021/acs.chemmater.7b00789

    Args:
        composition (list): Element objects in composition OR symbols of elements in composition
        stoichs (list): Corresponding stoichiometries in the composition

    Returns:
        norm (list): List of floats representing the composition that sum
            to one

    """
    if stoichs == None:
        stoichs = [1 for i, el in enumerate(composition)]

    ML_rep = [0 for i in range(1, 103)]
    if type(composition[0]) == Element:
        for element, stoich in zip(composition, stoichs):
            ML_rep[int(element.number) - 1] += stoich
    else:
        for element, stoich in zip(composition, stoichs):
            ML_rep[int(Element(element).number) - 1] += stoich

    norm = [float(i) / sum(ML_rep) for i in ML_rep]
    return norm
Example #3
0
def find_instances(anion, structures):
    """Finds the number of instances of each species in a list of pymatgen
    Structure objects when a given anion is the most electronegative one
    present. Also adds most electronegative anion to the dictionary.
    Args:
        anion (Pymatgen Species): Anion of interest
        structures (list): Dictionaries containing pymatgen Structures
    """
    an_containing = []
    for i in structures:
        if anion in i['structure'].composition:
            # Check whether anion most electronegative element
            an_eneg = Element(anion.symbol).pauling_eneg
            all_enegs = [Element(sp.symbol).pauling_eneg for \
            sp in i['structure'].composition]
            if all(eneg <= an_eneg for eneg in all_enegs):
                comp = [j for j in i['structure'].composition]
                i['most_eneg_anion'] = anion
                an_containing.append(comp)

    an_containing = [i for sublist in an_containing for i in sublist]
    an_containing = dict(Counter(an_containing))
    an_containing.pop(anion)
    return (an_containing)
Example #4
0
def plot_metal(metal, list_scores, spec_list, show_legend=False):
    """ Plot distribution of species for individual metals.
    Args:
        metal (str): metal element of interest to plot
        list_scores (dict): Lists of scores for the species in spec_list
        keyed by anion.
        spec_list (list): Pymatgen species in same order as corresponding
        scores in each list within the dict list_scores
        show_legend (bool): display legend on plot
     """
    # Set initial very daft x and y range values to be adjusted below
    min_x, max_x = 20, -20
    max_y = 0

    overall_list = []
    for anion in list_scores.keys():
        an_dict = {}
        for spec, score in zip(spec_list, list_scores[anion]):
            if spec.symbol == metal:
                an_dict[spec.oxi_state] = score
        sorted_list = sorted(an_dict.items())
        if an_dict:
            x, y = zip(*sorted_list)
            x = list(x)
            y = list(y)
            overall_list.append([x, y, anion])
            if min(x) < min_x:
                min_x = min(x)
            if max(x) > max_x:
                max_x = max(x)
            if max(y) > max_y:
                max_y = max(y)
        else:
            overall_list.append([[1], [0], anion])

    # Plotting
    labels = [
        "$F^-$", "$O^{2-}$", "$Cl^-$", "$Br^-$", "$I^-$", "$S^{2-}$",
        "$Se^{2-}$", "$Te^{2-}$"
    ]

    # Aesthetics and bar positions
    width = 1. / 10.
    pos = 0
    colours = [
        '#E51200', '#DF5400', '#DA9300', '#D4CE00', '#97CF00', '#57C900',
        '#1BC400', '#00BF1D'
    ]

    for col, anion in enumerate(overall_list):
        pos += width
        plt.bar(np.array(anion[0]) + pos,
                anion[1],
                width,
                label=labels[col],
                color=colours[col])

    if show_legend:
        plt.legend(prop={'size': 24})

    plt.xticks(
        np.arange(min_x, max_x + 1) + 0.5,
        np.arange(min_x, max_x + 1, dtype=np.int_))
    if min_x < 0:
        min_x = 0
    plt.xlim(min_x, max_x + 1)
    plt.ylim(0, 1.19)
    plt.xlabel('Oxidation state')
    plt.ylabel('Species fraction')

    at_no = int(Element(metal).number)
    mass = Element(metal).mass
    plt.text(np.mean([max_x, min_x]) + 0.5,
             1.0,
             "$^{{{0}}}$\n  {1}\n$_{{{2:.2f}}}$".format(at_no, metal, mass),
             bbox={
                 'facecolor': 'gray',
                 'alpha': 0.3,
                 'pad': 20
             },
             size=28)
    plt.tight_layout()
    plt.savefig('OxidationState_score_{0}'.format(metal, dpi=300))
    plt.show()
def species_totals(structures,
                   count_elements=False,
                   anions=[],
                   edit_structures_dicts=True,
                   return_species_list=False):
    """Given a set of pymatgen structures in the form of dictionaries where
    the Structure is keyed as 'structure', returns the number
    of compounds that features each Species.
    Args:
        structures (list): dictionaries containing pymatgen Structures.
        count_elements (bool): switch to counting elements not species.
        anions (list): Pymatgen.Species anions of interestself.
        edit_structure_dicts (bool): Modify the dicts in the structures list
        to add a 'most_eneg_anion' key.
    Returns:
        totals (dict): Totals of each species in structure list.
        or an_containing (dict): Totals of each species separated by anion.
        species_list (optional): List of species for structures as generated by
        get_unique_species.
    """
    # Simple method if simply counting all species or elements
    if not anions:
        totals = []
        if count_elements:
            for i in structures:
                comp = [j.symbol for j in i['structure'].composition]
                totals.append(comp)
            totals = [i for sublist in totals for i in sublist]
            totals = dict(Counter(totals))
        else:
            for i in structures:
                comp = [j for j in i['structure'].composition]
                totals.append(comp)
            totals = [i for sublist in totals for i in sublist]
            totals = dict(Counter(totals))
    # Method used if collecting count per anion
    else:
        totals = {}
        for anion in tqdm(anions):
            an_containing = []
            for i in structures:
                if anion in i['structure'].composition:
                    # Check whether anion is most electronegative element
                    an_eneg = Element(anion.symbol).pauling_eneg
                    all_enegs = [Element(sp.symbol).pauling_eneg for \
                    sp in i['structure'].composition]
                    if all(eneg <= an_eneg for eneg in all_enegs):
                        comp = [j for j in i['structure'].composition]
                        an_containing.append(comp)
                        if edit_structures_dicts:
                            i['most_eneg_anion'] = anion

            an_containing = [i for sublist in an_containing for i in sublist]
            an_containing = dict(Counter(an_containing))
            an_containing.pop(anion)
            totals[anion] = an_containing

    # Return objects based on whether species list required
    if return_species_list:
        return (totals, get_unique_species(structures))
    else:
        return (totals)
Example #6
0
def assign_prob(structures,
                scoring='overall_score',
                verbose=False,
                edit_struc_dict=True,
                list_scores=None,
                species_list=None):
    """ Assigns probability values to structures based on the list of score values.
    Args:
        structures (list): Pymatgen Structures, keyed under 'structure'.
        list_scores (dict): Lists of scores for the species in spec_list keyed by anion
        (as produced by generate_scores). Default values used from Faraday Discussions paper
        (DOI: 10.1039/C8FD00032H) if none supplied.
        species_list (list): Pymatgen species in same order as corresponding lists in list_scores.
        Default values used from Faraday Discussions paper (DOI: 10.1039/C8FD00032H) if none supplied.
        scoring (str): Can be either:
                        overall_score - Mean species-anion score for each species of interest
                        in the composition.
                        limiting_score - As above but minimum species-anion score.
                        probability - Product of scores.
                        probability_simple - Product of scores for different species only (set(comp))
        verbose (bool): Explicitly print any compounds that were skipped over due to the elements
        they contain.
        edit_struc_dict (bool): Add the probability to the dicts in the structures list.
    Returns:
        probabilities_list (list): Score for each structure in structures.
    """
    if not list_scores:
        # Import default list_scores from data directory
        with open(
                os.path.join(
                    data_directory,
                    'oxidation_states/oxidationstates_prob_table.csv'),
                'r') as f:
            reader = csv.reader(f)
            list_scores = {eval(rows[0]): eval(rows[1]) for rows in reader}
            print('INFO: Using default list_scores.')

    if not species_list:
        # Import species_list from data directory
        with open(
                os.path.join(data_directory,
                             'oxidation_states/species_list.txt'), 'r') as f:
            species_list = eval(f.readline())
            print('INFO: Using default species_list.')
    scores_dict = {}
    for key in list_scores.keys():
        an = {}
        for spec, val in zip(species_list, list_scores[key]):
            an[spec] = val
        scores_dict[key] = an

    probabilities_list = []
    for struc in structures:
        scores = []
        comp = set(list(struc['structure'].species))
        comp = [(sp.symbol, sp.oxi_state) for sp in comp]

        an_symbols = [an[0] for an in list_scores.keys()]
        if 'most_eneg_anion' not in struc.keys():
            # Get most eneg element in struc
            els = [Element(e[0]) for e in comp]
            els.sort(key=lambda x: x.pauling_eneg, reverse=True)
            most_eneg = els[0].symbol
            if most_eneg in an_symbols:
                for poss in list_scores.keys():
                    if poss[0] == most_eneg:
                        struc['most_eneg_anion'] = poss
            else:
                print(
                    'No data available for most electronegative anion in structure.'
                )
        try:
            scores = [scores_dict[struc['most_eneg_anion']][sp] for sp in comp \
            if sp in species_list]
            if scoring == 'overall_score':
                overall_score = np.mean(scores)
            elif scoring == 'limiting_score':
                overall_score = min(scores)
            elif scoring == 'probability':
                overall_score = np.prod(scores)
            elif scoring == 'probability_simple':
                scores = [scores_dict[struc['most_eneg_anion']][sp] for sp in list(set(comp)) \
                if sp in species_list]
                overall_score = np.prod(scores)
        except:
            if verbose:
                print('Could not get score for: {}'.format(comp))
            overall_score = 0
        if edit_struc_dict:
            struc['probability'] = overall_score
        probabilities_list.append(overall_score)

    return probabilities_list