import pandas as pd
from sklearn.externals.joblib import Memory

mem = Memory(cachedir="/home/bmanubay/.thermoml/")

@mem.cache
def resolve_cached(x, rtype):
   return cirpy.resolve(x, rtype)

# Compounds of most interest as decide by David, Chris and Bryce   
davmollist = ['2,2,4-trimethylpentane', 'cycloheptane', 'diisopropylether', 'isopropyl ether', 'dimethoxymethane', '2,3-dimethylbutane', '2,2-dimethylbutane', '3-methylpentane', 'neohexane', '4-methyl-2-pentanol', '2-methyl-2-pentanol', '1,1-diethoxyethane', 'tert-butanol', 'tetrahydrofuran', 'heptane', 'water', 'ethanol', '1-butanol', 'methyl tert-butyl ether']
S = pd.DataFrame({'IUPAC_Names': davmollist}, columns = ['IUPAC_Names'])
S["SMILES"] = S.IUPAC_Names.apply(lambda x: resolve_cached(x, "smiles"))


df = th.pandas_dataframe()
dt = list(df.columns)

bad_filenames = ["/home/bmanubay/.thermoml/j.fluid.2013.12.014.xml"]  # This file confirmed to have possible data entry errors.
df = df[~df.filename.isin(bad_filenames)]

experiments = ["Mass density, kg/m3", "Excess molar enthalpy (molar enthalpy of mixing), kJ/mol", "Excess molar heat capacity, J/K/mol", "Excess molar volume, m3/mol", "Activity coefficient", "Speed of sound, m/s", "Relative permittivity at zero frequency"]

ind_list = [df[exp].dropna().index for exp in experiments]
ind = reduce(lambda x,y: x.union(y), ind_list)
df = df.ix[ind]

name_to_formula = pd.read_hdf("/home/bmanubay/.thermoml/compound_name_to_formula.h5", 'data')
name_to_formula = name_to_formula.dropna()

# Extract rows with two components
Beispiel #2
0

# Compounds of most interest as decide by David, Chris and Bryce
davmollist = [
    '2,2,4-trimethylpentane', 'cycloheptane', 'diisopropylether',
    'isopropyl ether', 'dimethoxymethane', '2,3-dimethylbutane',
    '2,2-dimethylbutane', '3-methylpentane', 'neohexane',
    '4-methyl-2-pentanol', '2-methyl-2-pentanol', '1,1-diethoxyethane',
    'tert-butanol', 'tetrahydrofuran', 'heptane', 'water', 'ethanol',
    '1-butanol', 'methyl tert-butyl ether'
]
S = pd.DataFrame({'IUPAC_Names': davmollist}, columns=['IUPAC_Names'])
S["SMILES"] = S.IUPAC_Names.apply(lambda x: resolve_cached(
    x, "smiles"))  # our interesting compounds in SMILES as a df column

df = th.pandas_dataframe(
)  # pull all ThermoML data into Pandas df (as it is in your local cache)
dt = list(df.columns)

bad_filenames = ["/home/bmanubay/.thermoml/j.fluid.2013.12.014.xml"
                 ]  # This file confirmed to have possible data entry errors.
df = df[~df.filename.isin(bad_filenames)]

# Define properties of interest
experiments = [
    "Mass density, kg/m3", "Speed of sound, m/s",
    "Relative permittivity at zero frequency",
    "Molar heat capacity at constant pressure, J/K/mol",
    "Molar enthalpy of vaporization or sublimation, kJ/mol",
    "Molar enthalpy, kJ/mol"
]
"""

import thermopyl as th 
from thermopyl import thermoml_lib
import cirpy
import numpy as np
import pandas as pd
from sklearn.externals.joblib import Memory

mem = Memory(cachedir="/home/bmanubay/.thermoml/")

@mem.cache
def resolve_cached(x, rtype):
   return cirpy.resolve(x, rtype)

df = th.pandas_dataframe()
dt = list(df.columns)

bad_filenames = ["/home/bmanubay/.thermoml/j.fluid.2013.12.014.xml"]  # This file confirmed to have possible data entry errors.
df = df[~df.filename.isin(bad_filenames)]

experiments = ["Mass density, kg/m3","Speed of sound, m/s", "Relative permittivity at zero frequency", "Activity coefficient", "Specific heat capacity at constant pressure, J/K/kg", "Molar heat capacity at constant pressure, J/K/mol", "Molar heat capacity at constant volume, J/K/mol", "Molar volume, m3/mol", "Specific volume, m3/kg", "Molar enthalpy, kJ/mol"]

ind_list = [df[exp].dropna().index for exp in experiments]
ind = reduce(lambda x,y: x.union(y), ind_list)
df = df.ix[ind]

name_to_formula = pd.read_hdf("/home/bmanubay/.thermoml/compound_name_to_formula.h5", 'data')
name_to_formula = name_to_formula.dropna()

# Extract rows with two components