def load_economic_data(): global HPV_data if HPV_data is not None: return None global _EPACDRDict, _ECHATonnageDict '''OECD are chemicals produced by and OECD members in > 1000 tonnes/year.''' HPV_data = pd.read_csv(os.path.join(folder, 'HPV 2015 March 3.csv'), sep='\t', index_col=0) # 13061-29-2 not valid and removed _ECHATonnageDict = {} with zipfile.ZipFile(os.path.join(folder, 'ECHA Tonnage Bands.csv.zip')) as z: with z.open(z.namelist()[0]) as f: for line in f.readlines(): # for some reason, the file must be decoded to UTF8 first CAS, band = line.decode("utf-8").strip('\n').split('\t') if CAS in _ECHATonnageDict: if band in _ECHATonnageDict[CAS]: pass else: _ECHATonnageDict[CAS].append(band) else: _ECHATonnageDict[CAS] = [band] _EPACDRDict = {} with open(os.path.join(folder, 'EPA 2012 Chemical Data Reporting.csv')) as f: '''EPA summed reported chemical usages. In metric tonnes/year after conversion. Many producers keep their date confidential. This was originally in terms of lb/year, but rounded to the nearest kg. ''' next(f) for line in f: values = line.rstrip().split('\t') CAS, manufactured, imported, exported = to_num(values) _EPACDRDict[CAS] = { "Manufactured": manufactured / 1000., "Imported": imported / 1000., "Exported": exported / 1000. }
_OntarioExposureLimits = {} with open(os.path.join(folder, 'Ontario Exposure Limits.csv'), encoding='utf-8') as f: '''Read in a dict of TWAs, STELs, and Ceiling Limits. The data source is the Ontario Labor Website. They have obtained their data in part from their own reviews, and also from ACGIH. Warning: The lowest value is taken, when multiple units or different forms of a compound are listed. Note that each province has a different set of values, but these serve as general values. ''' next(f) for line in f: values = to_num(line.strip('\n').split('\t')) if values[0]: for CASRN in values[0].split(';'): _ppm_TWA, _mgm3_TWA = str_to_ppm_mgm3(values[2], CASRN.strip()) _ppm_STEL, _mgm3_STEL = str_to_ppm_mgm3(values[3], CASRN.strip()) _ppm_C, _mgm3_C = str_to_ppm_mgm3(values[4], CASRN.strip()) if values[5] == 'Skin': _skin = True else: _skin = False _OntarioExposureLimits[CASRN] = {"Name": values[1], "TWA (ppm)": _ppm_TWA, "TWA (mg/m^3)": _mgm3_TWA, "STEL (ppm)": _ppm_STEL, "STEL (mg/m^3)": _mgm3_STEL, "Ceiling (ppm)": _ppm_C, "Ceiling (mg/m^3)": _mgm3_C, "Skin":_skin} #TODO: Add CRC exposure limits. Note that functions should be used.
else: _ECHATonnageDict[CAS].append(band) else: _ECHATonnageDict[CAS] = [band] _EPACDRDict = {} with open(os.path.join(folder, 'EPA 2012 Chemical Data Reporting.csv')) as f: '''EPA summed reported chemical usages. In metric tonnes/year after conversion. Many producers keep their date confidential. This was originally in terms of lb/year, but rounded to the nearest kg. ''' next(f) for line in f: values = line.rstrip().split('\t') CAS, manufactured, imported, exported = to_num(values) _EPACDRDict[CAS] = { "Manufactured": manufactured / 1000., "Imported": imported / 1000., "Exported": exported / 1000. } #EPACDR_data = pd.read_csv(os.path.join(folder,'EPA Chemical Data Reporting - 2012.csv.gz'), sep='\t', # index_col=0, dtype={'CASRN': np.int32, 'Domestic': np.float64, 'Imported': np.float64, # 'Exported': np.float64}, compression='gzip') EPACDR = 'EPA Chemical Data Reporting (2012)' ECHA = 'European Chemicals Agency Total Tonnage Bands' OECD = 'OECD high production volume chemicals' economic_status_methods = [EPACDR, ECHA, OECD]
>NH (ring) 0.0130 0.0114 29 52.82 101.51 31.65 75.61 1.18E+1 -2.30E-2 1.07E-4 -6.28E-8 7.490 6.930 n. a. n. a. >N- (nonring) 0.0169 0.0074 9 11.74 48.84 123.34 163.16 -3.11E+1 2.27E-1 -3.20E-4 1.46E-7 4.703 1.896 n. a. n. a. -N= (nonring) 0.0255 -0.0099 n. a. 74.60 n. a. 23.61 n. a. n. a. n. a. n. a. n. a. n. a. 3.335 n. a. n. a. -N= (ring) 0.0085 0.0076 34 57.55 68.40 55.52 79.93 8.83 -3.84E-3 4.35E-5 -2.60E-8 3.649 6.528 n. a. n. a. =NH n. a. n. a. n. a. 83.08 68.91 93.70 119.66 5.69 -4.12E-3 1.28E-4 -8.88E-8 n. a. 12.169 n. a. n. a. -CN 0.0496 -0.0101 91 125.66 59.89 88.43 89.22 3.65E+1 -7.33E-2 1.84E-4 -1.03E-7 2.414 12.851 n. a. n. a. -NO2 0.0437 0.0064 91 152.54 127.24 -66.57 -16.83 2.59E+1 -3.74E-3 1.29E-4 -8.88E-8 9.679 16.738 n. a. n. a. -SH 0.0031 0.0084 63 63.56 20.09 -17.33 -22.99 3.53E+1 -7.58E-2 1.85E-4 -1.03E-7 2.360 6.884 n. a. n. a. -S- (nonring) 0.0119 0.0049 54 68.78 34.40 41.87 33.12 1.96E+1 -5.61E-3 4.02E-5 -2.76E-8 4.130 6.817 n. a. n. a. -S- (ring) 0.0019 0.0051 38 52.10 79.93 39.10 27.76 1.67E+1 4.81E-3 2.77E-5 -2.11E-8 1.557 5.984 n. a. n. a.''' joback_groups_str_dict = {} joback_groups_id_dict = {} JOBACK = namedtuple('JOBACK', 'i, name, Tc, Pc, Vc, Tb, Tm, Hform, Gform, Cpa, Cpb, Cpc, Cpd, Hfus, Hvap, mua, mub') for i, line in enumerate(joback_data_txt.split('\n')): parsed = to_num(line.split('\t')) j = JOBACK(i+1, *parsed) joback_groups_str_dict[parsed[0]] = j joback_groups_id_dict[i+1] = j def smarts_fragment(catalog, rdkitmol=None, smi=None): r'''Fragments a molecule into a set of unique groups and counts as specified by the `catalog`. The molecule can either be an rdkit molecule object, or a smiles string which will be parsed by rdkit. Returns a dictionary of groups and their counts according to the indexes of the catalog provided. Parameters ---------- catalog : dict
from scipy.interpolate import interp1d import pandas as pd F = e * N_A folder = os.path.join(os.path.dirname(__file__), 'Electrolytes') _Laliberte_Density_ParametersDict = {} _Laliberte_Viscosity_ParametersDict = {} _Laliberte_Heat_Capacity_ParametersDict = {} # Do not re-implement with Pandas, as current methodology uses these dicts in each function with open(os.path.join(folder, 'Laliberte2009.tsv')) as f: next(f) for line in f: values = to_num(line.split('\t')) _name, CASRN, _formula, _MW, c0, c1, c2, c3, c4, Tmin, Tmax, wMax, pts = values[ 0:13] if c0: _Laliberte_Density_ParametersDict[CASRN] = { "Name": _name, "Formula": _formula, "MW": _MW, "C0": c0, "C1": c1, "C2": c2, "C3": c3, "C4": c4, "Tmin": Tmin, "Tmax": Tmax,
_ECHATonnageDict[CAS].append(band) else: _ECHATonnageDict[CAS] = [band] _EPACDRDict = {} with open(os.path.join(folder, 'EPA 2012 Chemical Data Reporting.csv')) as f: '''EPA summed reported chemical usages. In metric tonnes/year after conversion. Many producers keep their date confidential. This was originally in terms of lb/year, but rounded to the nearest kg. ''' next(f) for line in f: values = line.rstrip().split('\t') CAS, manufactured, imported, exported = to_num(values) _EPACDRDict[CAS] = {"Manufactured": manufactured/1000., "Imported": imported/1000., "Exported": exported/1000.} #EPACDR_data = pd.read_csv(os.path.join(folder,'EPA Chemical Data Reporting - 2012.csv.gz'), sep='\t', # index_col=0, dtype={'CASRN': np.int32, 'Domestic': np.float64, 'Imported': np.float64, # 'Exported': np.float64}, compression='gzip') EPACDR = 'EPA Chemical Data Reporting (2012)' ECHA = 'European Chemicals Agency Total Tonnage Bands' OECD = 'OECD high production volume chemicals' def economic_status(CASRN, Method=None, AvailableMethods=False): # pragma: no cover '''Look up the economic status of a chemical.
-N= (ring) 0.0085 0.0076 34 57.55 68.40 55.52 79.93 8.83 -3.84E-3 4.35E-5 -2.60E-8 3.649 6.528 n. a. n. a. =NH n. a. n. a. n. a. 83.08 68.91 93.70 119.66 5.69 -4.12E-3 1.28E-4 -8.88E-8 n. a. 12.169 n. a. n. a. -CN 0.0496 -0.0101 91 125.66 59.89 88.43 89.22 3.65E+1 -7.33E-2 1.84E-4 -1.03E-7 2.414 12.851 n. a. n. a. -NO2 0.0437 0.0064 91 152.54 127.24 -66.57 -16.83 2.59E+1 -3.74E-3 1.29E-4 -8.88E-8 9.679 16.738 n. a. n. a. -SH 0.0031 0.0084 63 63.56 20.09 -17.33 -22.99 3.53E+1 -7.58E-2 1.85E-4 -1.03E-7 2.360 6.884 n. a. n. a. -S- (nonring) 0.0119 0.0049 54 68.78 34.40 41.87 33.12 1.96E+1 -5.61E-3 4.02E-5 -2.76E-8 4.130 6.817 n. a. n. a. -S- (ring) 0.0019 0.0051 38 52.10 79.93 39.10 27.76 1.67E+1 4.81E-3 2.77E-5 -2.11E-8 1.557 5.984 n. a. n. a.''' joback_groups_str_dict = {} joback_groups_id_dict = {} JOBACK = namedtuple( 'JOBACK', 'i, name, Tc, Pc, Vc, Tb, Tm, Hform, Gform, Cpa, Cpb, Cpc, Cpd, Hfus, Hvap, mua, mub' ) for i, line in enumerate(joback_data_txt.split('\n')): parsed = to_num(line.split('\t')) j = JOBACK(i + 1, *parsed) joback_groups_str_dict[parsed[0]] = j joback_groups_id_dict[i + 1] = j def smarts_fragment(catalog, rdkitmol=None, smi=None): r'''Fragments a molecule into a set of unique groups and counts as specified by the `catalog`. The molecule can either be an rdkit molecule object, or a smiles string which will be parsed by rdkit. Returns a dictionary of groups and their counts according to the indexes of the catalog provided. Parameters ---------- catalog : dict
CRC_aqueous_thermodynamics = pd.read_csv(os.path.join(folder, 'CRC Thermodynamic Properties of Aqueous Ions.csv'), sep='\t', index_col=0) electrolyte_dissociation_reactions = pd.read_csv(os.path.join(folder, 'Electrolyte dissociations.csv'), sep='\t') _Laliberte_Density_ParametersDict = {} _Laliberte_Viscosity_ParametersDict = {} _Laliberte_Heat_Capacity_ParametersDict = {} # Do not re-implement with Pandas, as current methodology uses these dicts in each function with open(os.path.join(folder, 'Laliberte2009.tsv')) as f: next(f) for line in f: values = to_num(line.split('\t')) _name, CASRN, _formula, _MW, c0, c1, c2, c3, c4, Tmin, Tmax, wMax, pts = values[0:13] if c0: _Laliberte_Density_ParametersDict[CASRN] = {"Name":_name, "Formula":_formula, "MW":_MW, "C0":c0, "C1":c1, "C2":c2, "C3":c3, "C4":c4, "Tmin":Tmin, "Tmax":Tmax, "wMax":wMax} v1, v2, v3, v4, v5, v6, Tmin, Tmax, wMax, pts = values[13:23] if v1: _Laliberte_Viscosity_ParametersDict[CASRN] = {"Name":_name, "Formula":_formula, "MW":_MW, "V1":v1, "V2":v2, "V3":v3, "V4":v4, "V5":v5, "V6":v6, "Tmin":Tmin, "Tmax":Tmax, "wMax":wMax} a1, a2, a3, a4, a5, a6, Tmin, Tmax, wMax, pts = values[23:34] if a1: _Laliberte_Heat_Capacity_ParametersDict[CASRN] = {"Name":_name, "Formula":_formula, "MW":_MW, "A1":a1, "A2":a2, "A3":a3, "A4":a4, "A5":a5, "A6":a6, "Tmin":Tmin, "Tmax":Tmax, "wMax":wMax}
F = e*N_A folder = os.path.join(os.path.dirname(__file__), 'Electrolytes') _Laliberte_Density_ParametersDict = {} _Laliberte_Viscosity_ParametersDict = {} _Laliberte_Heat_Capacity_ParametersDict = {} # Do not re-implement with Pandas, as current methodology uses these dicts in each function with open(os.path.join(folder, 'Laliberte2009.csv')) as f: next(f) for line in f: values = to_num(line.split('\t')) _name, CASRN, _formula, _MW, c0, c1, c2, c3, c4, Tmin, Tmax, wMax, pts = values[0:13] if c0: _Laliberte_Density_ParametersDict[CASRN] = {"Name":_name, "Formula":_formula, "MW":_MW, "C0":c0, "C1":c1, "C2":c2, "C3":c3, "C4":c4, "Tmin":Tmin, "Tmax":Tmax, "wMax":wMax} v1, v2, v3, v4, v5, v6, Tmin, Tmax, wMax, pts = values[13:23] if v1: _Laliberte_Viscosity_ParametersDict[CASRN] = {"Name":_name, "Formula":_formula, "MW":_MW, "V1":v1, "V2":v2, "V3":v3, "V4":v4, "V5":v5, "V6":v6, "Tmin":Tmin, "Tmax":Tmax, "wMax":wMax} a1, a2, a3, a4, a5, a6, Tmin, Tmax, wMax, pts = values[23:34] if a1: _Laliberte_Heat_Capacity_ParametersDict[CASRN] = {"Name":_name, "Formula":_formula, "MW":_MW, "A1":a1, "A2":a2, "A3":a3, "A4":a4, "A5":a5, "A6":a6, "Tmin":Tmin, "Tmax":Tmax, "wMax":wMax}