import pandas as pd from sklearn.externals.joblib import Memory mem = Memory(cachedir="/home/bmanubay/.thermoml/") @mem.cache def resolve_cached(x, rtype): return cirpy.resolve(x, rtype) # Compounds of most interest as decide by David, Chris and Bryce davmollist = ['2,2,4-trimethylpentane', 'cycloheptane', 'diisopropylether', 'isopropyl ether', 'dimethoxymethane', '2,3-dimethylbutane', '2,2-dimethylbutane', '3-methylpentane', 'neohexane', '4-methyl-2-pentanol', '2-methyl-2-pentanol', '1,1-diethoxyethane', 'tert-butanol', 'tetrahydrofuran', 'heptane', 'water', 'ethanol', '1-butanol', 'methyl tert-butyl ether'] S = pd.DataFrame({'IUPAC_Names': davmollist}, columns = ['IUPAC_Names']) S["SMILES"] = S.IUPAC_Names.apply(lambda x: resolve_cached(x, "smiles")) df = th.pandas_dataframe() dt = list(df.columns) bad_filenames = ["/home/bmanubay/.thermoml/j.fluid.2013.12.014.xml"] # This file confirmed to have possible data entry errors. df = df[~df.filename.isin(bad_filenames)] experiments = ["Mass density, kg/m3", "Excess molar enthalpy (molar enthalpy of mixing), kJ/mol", "Excess molar heat capacity, J/K/mol", "Excess molar volume, m3/mol", "Activity coefficient", "Speed of sound, m/s", "Relative permittivity at zero frequency"] ind_list = [df[exp].dropna().index for exp in experiments] ind = reduce(lambda x,y: x.union(y), ind_list) df = df.ix[ind] name_to_formula = pd.read_hdf("/home/bmanubay/.thermoml/compound_name_to_formula.h5", 'data') name_to_formula = name_to_formula.dropna() # Extract rows with two components
# Compounds of most interest as decide by David, Chris and Bryce davmollist = [ '2,2,4-trimethylpentane', 'cycloheptane', 'diisopropylether', 'isopropyl ether', 'dimethoxymethane', '2,3-dimethylbutane', '2,2-dimethylbutane', '3-methylpentane', 'neohexane', '4-methyl-2-pentanol', '2-methyl-2-pentanol', '1,1-diethoxyethane', 'tert-butanol', 'tetrahydrofuran', 'heptane', 'water', 'ethanol', '1-butanol', 'methyl tert-butyl ether' ] S = pd.DataFrame({'IUPAC_Names': davmollist}, columns=['IUPAC_Names']) S["SMILES"] = S.IUPAC_Names.apply(lambda x: resolve_cached( x, "smiles")) # our interesting compounds in SMILES as a df column df = th.pandas_dataframe( ) # pull all ThermoML data into Pandas df (as it is in your local cache) dt = list(df.columns) bad_filenames = ["/home/bmanubay/.thermoml/j.fluid.2013.12.014.xml" ] # This file confirmed to have possible data entry errors. df = df[~df.filename.isin(bad_filenames)] # Define properties of interest experiments = [ "Mass density, kg/m3", "Speed of sound, m/s", "Relative permittivity at zero frequency", "Molar heat capacity at constant pressure, J/K/mol", "Molar enthalpy of vaporization or sublimation, kJ/mol", "Molar enthalpy, kJ/mol" ]
""" import thermopyl as th from thermopyl import thermoml_lib import cirpy import numpy as np import pandas as pd from sklearn.externals.joblib import Memory mem = Memory(cachedir="/home/bmanubay/.thermoml/") @mem.cache def resolve_cached(x, rtype): return cirpy.resolve(x, rtype) df = th.pandas_dataframe() dt = list(df.columns) bad_filenames = ["/home/bmanubay/.thermoml/j.fluid.2013.12.014.xml"] # This file confirmed to have possible data entry errors. df = df[~df.filename.isin(bad_filenames)] experiments = ["Mass density, kg/m3","Speed of sound, m/s", "Relative permittivity at zero frequency", "Activity coefficient", "Specific heat capacity at constant pressure, J/K/kg", "Molar heat capacity at constant pressure, J/K/mol", "Molar heat capacity at constant volume, J/K/mol", "Molar volume, m3/mol", "Specific volume, m3/kg", "Molar enthalpy, kJ/mol"] ind_list = [df[exp].dropna().index for exp in experiments] ind = reduce(lambda x,y: x.union(y), ind_list) df = df.ix[ind] name_to_formula = pd.read_hdf("/home/bmanubay/.thermoml/compound_name_to_formula.h5", 'data') name_to_formula = name_to_formula.dropna() # Extract rows with two components