예제 #1
0
    def gen_atomic_df(self, composition_df):
        compound_lst = composition_df.columns.tolist()
        all_elements = self.chemical_attributes.columns.tolist()

        element_guide = np.zeros((len(all_elements), len(compound_lst)))
        for j in range(len(compound_lst)):
            c = compound_lst[j]
            cdic = parse_formula(c)
            for el in cdic:
                i = all_elements.index(el)
                element_guide[i,j] += cdic[el]

        atomic_df = np.zeros((len(composition_df), len(all_elements)))
        for i in range(len(compound_lst)):
            c = compound_lst[i]
            cdic = parse_formula(c)
            for el in cdic:
                j = all_elements.index(el)
                atomic_df[:,j] += composition_df[c].values*element_guide[j,i]

        atomic_df = pd.DataFrame(
            atomic_df,
            columns=all_elements,
            index=composition_df.index,
        )
        atomic_df = atomic_df.div(atomic_df.sum(axis=1), axis=0)

        return atomic_df
def preprocess_data():

	#open the CSV file into chunks
	reader = pd.read_csv('training_data.csv')

	#shuffle data and seperate labels and candidates
	formulas = list(reader['Formula'].to_numpy())
	labels = list(reader['Is_Candidate'].to_numpy().astype(int))
	data = list(zip(formulas, labels))

	random.shuffle(data)

	formulas, labels = zip(*data)
	formulas = list(formulas)
	labels = list(labels)

	parsed_formulas = []
	elements = [str(element.symbol) for element in periodictable.elements]
	elements = elements[1:]

	for formula in formulas:
		parsed_formula = []
		if isinstance(formula, str):
			parsed_formula = {k:0 for k in elements}
			for k, v in chemparse.parse_formula(formula).items():
				parsed_formula[k] = v
			parsed_formulas.append(list(parsed_formula.values()))

	print(parsed_formulas[0])


	training_size = 50000

	training_formulas = parsed_formulas[:training_size]
	testing_formulas = parsed_formulas[:training_size]

	training_labels = labels[:training_size]
	testing_labels = labels[:training_size]
	
	training_formulas = np.array(training_formulas)
	training_labels = np.array(training_labels)
	testing_formulas = np.array(testing_formulas)
	testing_labels = np.array(testing_labels)

	print(len(training_formulas[0]))
	model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(len(training_formulas[0]))),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

	model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

	model.summary()
	
	num_epochs = 30

	history = model.fit(training_formulas, training_labels, epochs=num_epochs, validation_data=(testing_formulas, testing_labels), verbose=2)
예제 #3
0
def parse_formula(formula):
    elements = chemparse.parse_formula(formula)
    for element in list(elements.keys()):
        if len(element) > 2:
            correct_elements = parse_segment(element)
            elements.pop(element)
            for elem in correct_elements:
                elements[elem] = correct_elements[elem]

    return elements
예제 #4
0
    def parseFormulaNorm(formula, sum_total=1, round_=False):
        dic = parse_formula(formula)
        sum_values = sum(dic.values())

        if round_:
            norm_dic = {
                k: round(v * sum_total / sum_values, round_)
                for k, v in dic.items()
            }
        else:
            norm_dic = {k: v / sum_values for k, v in dic.items()}

        return norm_dic
def main():
    df = pd.read_csv("Data/IMA_abiotic_labels.csv")
    formulas = df["IMA Chemistry (plain)"].tolist()

    dash_count = 0
    extra_count = 0
    for f in formulas:
        if '+' in f:
            f = re.sub('\d\+','',f)
            #print(chemparse.parse_formula(f))
        elif '-' in f:
            print(f)
            dash_count += 1
        elif re.findall('[^\da-zA-Z()\\box]+', f):
            print(f)
            print(chemparse.parse_formula(f))
            extra_count += 1
    print("Dash count:", dash_count)
    print("Extra count:", extra_count)
# Crystal System Prediction
 Integrating Concepts Of Chemistry & Computer Science 
''')

chem_data = data.Formula.apply(chemparse.parse_formula)
chem_data = pd.json_normalize(chem_data)
chem_data = chem_data.fillna(0)

data = data.join(chem_data)

st.subheader("Data Information")
st.dataframe(data)
st.write(data.describe())

user_input = st.sidebar.text_input("Enter The Formula Of The Compound")
user_input = chemparse.parse_formula(user_input)

element_list = [
    'K', 'S', 'O', 'Al', 'Fe', 'H', 'N', 'Ce', 'C', 'Cl', 'B', 'Cu', 'Ba',
    'Ca', 'Co', 'Pb', 'Mn', 'Mg', 'Hg', 'Ni', 'Cr', 'Sr', 'Na', 'Zn', 'Ag',
    'I', 'P'
]

user_data = {}

melting_point = {
    "Melting Point":
    int(st.sidebar.slider("Melting Point (in K)", 273.0, 2000.0, 664.1))
}
solubility = {
    "Solubility":
예제 #7
0
from mendeleev import element
import chemparse

tempElem = element("H")
tempMult = 0
totalValence = 0
counter = 0
inputThing = ""

while (inputThing != "0"):
    try:
        inputThing = input("Enter Formula:")
        formula = chemparse.parse_formula(inputThing)
        for k, v in formula.items():
            tempElem = element(k)
            tempMult = v
            totalValence = (tempElem.nvalence() * tempMult) + totalValence

        print(totalValence)
        totalValence = 0
    except:
        print("Error! Try Again.")
예제 #8
0
def initialize_properties_file(a, ai, id, d, ma):
    """Initializes a file over properties with correct titles and main structure
        for an material.

    Parameters:
    a (obj): a is an atoms object of class defined in ase. The material is made
            into an atoms object.
    ai (obj): initial atoms object an object of class sdefined in ase. The unit cell
                atoms object that md runs for.
    id (str): a special number identifying the material system.
    d (int): a number for the formatting of file. Give a correct spacing
            for printing to file.
    ma (boolean): a boolean indicating if the material is monoatomic

    Returns:
    None
    """

    # Help function for formating
    def lj(str, k=d):
        return " " + str.ljust(k + 6)

    file = open("property_calculations/properties_" + id + ".txt", "w+")

    file.write("Material ID: " + id + "\n")
    file.write("Unit cell composition: " + a.get_chemical_formula() + "\n")
    chem_formula = a.get_chemical_formula(mode='hill', empirical=True)
    file.write("Material:  " + chem_formula + "\n")

    # Write the elements as title
    file.write("Site positions of initial unit cell:" + "\n")
    dict = chemparse.parse_formula(ai.get_chemical_formula())
    els = list(dict.keys())
    prop_num = list(dict.values())
    tmp_ls = [(a + " ") * int(b) for a, b in zip(els, prop_num)
              ]  # Get ["Al", "Mg Mg Mg"] for "AlMg3" e.g.
    els_str = "".join(tmp_ls)
    els_ls = els_str.split()  # give you ["Al", "Mg", "Mg", "Mg"] e.g.
    for a in els_ls:
        file.write(lj(a))

    # Write the site positions
    res_array = ai.get_positions()
    for i in range(0, 3):  # 3 components
        file.write("\n")
        for ii in range(0, len(res_array)):
            format_str = "." + str(d) + "f"
            val = format(res_array[:, i][ii], format_str)  # d decimals
            file.write(lj(val))

    file.write("\n")
    file.write("Properties:\n")
    file.write(
        lj("Time") + lj("Epot") + lj("Ekin") + lj("Etot") + lj("Temp", 2) +
        lj("MSD"))
    file.write(lj("Self_diff") + lj("LC_a", 3) + lj("LC_b", 3) + lj("LC_c", 3))
    file.write(lj("Volume") + lj("Pressure"))
    if ma:
        file.write(lj("DebyeT", 2) + lj("Lindemann"))
    file.write("\n")
    file.write(
        lj("fs") + lj("eV/atom") + lj("eV/atom") + lj("eV/atom") + lj("K", 2) +
        lj("Å^2"))
    file.write(lj("mm^2/s") + lj("Å", 3) + lj("Å", 3) + lj("Å", 3))
    file.write(lj("Å^3/atom") + lj("GPa"))
    if ma:
        file.write(lj("K", 2) + lj("1"))
    file.write("\n")
    file.close()
    return