Beispiel #1
0
def load_economic_data():
    global HPV_data
    if HPV_data is not None:
        return None
    global _EPACDRDict, _ECHATonnageDict
    '''OECD are chemicals produced by and OECD members in > 1000 tonnes/year.'''
    HPV_data = pd.read_csv(os.path.join(folder, 'HPV 2015 March 3.csv'),
                           sep='\t',
                           index_col=0)
    # 13061-29-2 not valid and removed

    _ECHATonnageDict = {}
    with zipfile.ZipFile(os.path.join(folder,
                                      'ECHA Tonnage Bands.csv.zip')) as z:
        with z.open(z.namelist()[0]) as f:
            for line in f.readlines():
                # for some reason, the file must be decoded to UTF8 first
                CAS, band = line.decode("utf-8").strip('\n').split('\t')
                if CAS in _ECHATonnageDict:
                    if band in _ECHATonnageDict[CAS]:
                        pass
                    else:
                        _ECHATonnageDict[CAS].append(band)
                else:
                    _ECHATonnageDict[CAS] = [band]

    _EPACDRDict = {}
    with open(os.path.join(folder,
                           'EPA 2012 Chemical Data Reporting.csv')) as f:
        '''EPA summed reported chemical usages. In metric tonnes/year after conversion.
        Many producers keep their date confidential.
        This was originally in terms of lb/year, but rounded to the nearest kg.
    
        '''
        next(f)
        for line in f:
            values = line.rstrip().split('\t')
            CAS, manufactured, imported, exported = to_num(values)
            _EPACDRDict[CAS] = {
                "Manufactured": manufactured / 1000.,
                "Imported": imported / 1000.,
                "Exported": exported / 1000.
            }
Beispiel #2
0
_OntarioExposureLimits = {}


with open(os.path.join(folder, 'Ontario Exposure Limits.csv'), encoding='utf-8') as f:
    '''Read in a dict of TWAs, STELs, and Ceiling Limits. The data source
    is the Ontario Labor Website. They have obtained their data in part from
    their own reviews, and also from ACGIH.
    Warning: The lowest value is taken, when multiple units or different forms
             of a compound are listed.
    Note that each province has a different set of values, but these serve
    as general values.
    '''
    next(f)
    for line in f:
        values = to_num(line.strip('\n').split('\t'))
        if values[0]:
            for CASRN in values[0].split(';'):
                _ppm_TWA, _mgm3_TWA = str_to_ppm_mgm3(values[2], CASRN.strip())
                _ppm_STEL, _mgm3_STEL = str_to_ppm_mgm3(values[3], CASRN.strip())
                _ppm_C, _mgm3_C = str_to_ppm_mgm3(values[4], CASRN.strip())
                if values[5] == 'Skin':
                    _skin = True
                else:
                    _skin = False
                _OntarioExposureLimits[CASRN] = {"Name": values[1],  "TWA (ppm)": _ppm_TWA,
                "TWA (mg/m^3)": _mgm3_TWA, "STEL (ppm)": _ppm_STEL,
                "STEL (mg/m^3)": _mgm3_STEL, "Ceiling (ppm)": _ppm_C,
                "Ceiling (mg/m^3)": _mgm3_C, "Skin":_skin}

#TODO: Add CRC exposure limits. Note that functions should be used.
Beispiel #3
0
                else:
                    _ECHATonnageDict[CAS].append(band)
            else:
                _ECHATonnageDict[CAS] = [band]

_EPACDRDict = {}
with open(os.path.join(folder, 'EPA 2012 Chemical Data Reporting.csv')) as f:
    '''EPA summed reported chemical usages. In metric tonnes/year after conversion.
    Many producers keep their date confidential.
    This was originally in terms of lb/year, but rounded to the nearest kg.

    '''
    next(f)
    for line in f:
        values = line.rstrip().split('\t')
        CAS, manufactured, imported, exported = to_num(values)
        _EPACDRDict[CAS] = {
            "Manufactured": manufactured / 1000.,
            "Imported": imported / 1000.,
            "Exported": exported / 1000.
        }
#EPACDR_data = pd.read_csv(os.path.join(folder,'EPA Chemical Data Reporting - 2012.csv.gz'), sep='\t',
#                       index_col=0, dtype={'CASRN': np.int32, 'Domestic': np.float64, 'Imported': np.float64,
#                                          'Exported': np.float64},  compression='gzip')

EPACDR = 'EPA Chemical Data Reporting (2012)'
ECHA = 'European Chemicals Agency Total Tonnage Bands'
OECD = 'OECD high production volume chemicals'

economic_status_methods = [EPACDR, ECHA, OECD]
Beispiel #4
0
>NH (ring) 	0.0130 	0.0114 	29 	52.82 	101.51 	31.65 	75.61 	1.18E+1 	-2.30E-2 	1.07E-4 	-6.28E-8 	7.490 	6.930 	n. a. 	n. a.
>N- (nonring) 	0.0169 	0.0074 	9 	11.74 	48.84 	123.34 	163.16 	-3.11E+1 	2.27E-1 	-3.20E-4 	1.46E-7 	4.703 	1.896 	n. a. 	n. a.
-N= (nonring) 	0.0255 	-0.0099 	n. a. 	74.60 	n. a. 	23.61 	n. a. 	n. a. 	n. a. 	n. a. 	n. a. 	n. a. 	3.335 	n. a. 	n. a.
-N= (ring) 	0.0085 	0.0076 	34 	57.55 	68.40 	55.52 	79.93 	8.83 	-3.84E-3 	4.35E-5 	-2.60E-8 	3.649 	6.528 	n. a. 	n. a.
=NH 	n. a. 	n. a. 	n. a. 	83.08 	68.91 	93.70 	119.66 	5.69 	-4.12E-3 	1.28E-4 	-8.88E-8 	n. a. 	12.169 	n. a. 	n. a.
-CN 	0.0496 	-0.0101 	91 	125.66 	59.89 	88.43 	89.22 	3.65E+1 	-7.33E-2 	1.84E-4 	-1.03E-7 	2.414 	12.851 	n. a. 	n. a.
-NO2 	0.0437 	0.0064 	91 	152.54 	127.24 	-66.57 	-16.83 	2.59E+1 	-3.74E-3 	1.29E-4 	-8.88E-8 	9.679 	16.738 	n. a. 	n. a.
-SH 	0.0031 	0.0084 	63 	63.56 	20.09 	-17.33 	-22.99 	3.53E+1 	-7.58E-2 	1.85E-4 	-1.03E-7 	2.360 	6.884 	n. a. 	n. a.
-S- (nonring) 	0.0119 	0.0049 	54 	68.78 	34.40 	41.87 	33.12 	1.96E+1 	-5.61E-3 	4.02E-5 	-2.76E-8 	4.130 	6.817 	n. a. 	n. a.
-S- (ring) 	0.0019 	0.0051 	38 	52.10 	79.93 	39.10 	27.76 	1.67E+1 	4.81E-3 	2.77E-5 	-2.11E-8 	1.557 	5.984 	n. a. 	n. a.'''

joback_groups_str_dict = {}
joback_groups_id_dict = {}
JOBACK = namedtuple('JOBACK', 'i, name, Tc, Pc, Vc, Tb, Tm, Hform, Gform, Cpa, Cpb, Cpc, Cpd, Hfus, Hvap, mua, mub')
for i, line in enumerate(joback_data_txt.split('\n')):
    parsed = to_num(line.split('\t'))
    j = JOBACK(i+1, *parsed)
    joback_groups_str_dict[parsed[0]] = j
    joback_groups_id_dict[i+1] = j


def smarts_fragment(catalog, rdkitmol=None, smi=None):
    r'''Fragments a molecule into a set of unique groups and counts as
    specified by the `catalog`. The molecule can either be an rdkit 
    molecule object, or a smiles string which will be parsed by rdkit.
    Returns a dictionary of groups and their counts according to the
    indexes of the catalog provided.
    
    Parameters
    ----------
    catalog : dict
Beispiel #5
0
from scipy.interpolate import interp1d
import pandas as pd

F = e * N_A

folder = os.path.join(os.path.dirname(__file__), 'Electrolytes')

_Laliberte_Density_ParametersDict = {}
_Laliberte_Viscosity_ParametersDict = {}
_Laliberte_Heat_Capacity_ParametersDict = {}

# Do not re-implement with Pandas, as current methodology uses these dicts in each function
with open(os.path.join(folder, 'Laliberte2009.tsv')) as f:
    next(f)
    for line in f:
        values = to_num(line.split('\t'))

        _name, CASRN, _formula, _MW, c0, c1, c2, c3, c4, Tmin, Tmax, wMax, pts = values[
            0:13]
        if c0:
            _Laliberte_Density_ParametersDict[CASRN] = {
                "Name": _name,
                "Formula": _formula,
                "MW": _MW,
                "C0": c0,
                "C1": c1,
                "C2": c2,
                "C3": c3,
                "C4": c4,
                "Tmin": Tmin,
                "Tmax": Tmax,
Beispiel #6
0
                    _ECHATonnageDict[CAS].append(band)
            else:
                _ECHATonnageDict[CAS] = [band]


_EPACDRDict = {}
with open(os.path.join(folder, 'EPA 2012 Chemical Data Reporting.csv')) as f:
    '''EPA summed reported chemical usages. In metric tonnes/year after conversion.
    Many producers keep their date confidential.
    This was originally in terms of lb/year, but rounded to the nearest kg.

    '''
    next(f)
    for line in f:
        values = line.rstrip().split('\t')
        CAS, manufactured, imported, exported = to_num(values)
        _EPACDRDict[CAS] = {"Manufactured": manufactured/1000., "Imported": imported/1000.,
                            "Exported": exported/1000.}
#EPACDR_data = pd.read_csv(os.path.join(folder,'EPA Chemical Data Reporting - 2012.csv.gz'), sep='\t',
#                       index_col=0, dtype={'CASRN': np.int32, 'Domestic': np.float64, 'Imported': np.float64,
#                                          'Exported': np.float64},  compression='gzip')


EPACDR = 'EPA Chemical Data Reporting (2012)'
ECHA = 'European Chemicals Agency Total Tonnage Bands'
OECD = 'OECD high production volume chemicals'


def economic_status(CASRN, Method=None, AvailableMethods=False):  # pragma: no cover
    '''Look up the economic status of a chemical.
Beispiel #7
0
-N= (ring) 	0.0085 	0.0076 	34 	57.55 	68.40 	55.52 	79.93 	8.83 	-3.84E-3 	4.35E-5 	-2.60E-8 	3.649 	6.528 	n. a. 	n. a.
=NH 	n. a. 	n. a. 	n. a. 	83.08 	68.91 	93.70 	119.66 	5.69 	-4.12E-3 	1.28E-4 	-8.88E-8 	n. a. 	12.169 	n. a. 	n. a.
-CN 	0.0496 	-0.0101 	91 	125.66 	59.89 	88.43 	89.22 	3.65E+1 	-7.33E-2 	1.84E-4 	-1.03E-7 	2.414 	12.851 	n. a. 	n. a.
-NO2 	0.0437 	0.0064 	91 	152.54 	127.24 	-66.57 	-16.83 	2.59E+1 	-3.74E-3 	1.29E-4 	-8.88E-8 	9.679 	16.738 	n. a. 	n. a.
-SH 	0.0031 	0.0084 	63 	63.56 	20.09 	-17.33 	-22.99 	3.53E+1 	-7.58E-2 	1.85E-4 	-1.03E-7 	2.360 	6.884 	n. a. 	n. a.
-S- (nonring) 	0.0119 	0.0049 	54 	68.78 	34.40 	41.87 	33.12 	1.96E+1 	-5.61E-3 	4.02E-5 	-2.76E-8 	4.130 	6.817 	n. a. 	n. a.
-S- (ring) 	0.0019 	0.0051 	38 	52.10 	79.93 	39.10 	27.76 	1.67E+1 	4.81E-3 	2.77E-5 	-2.11E-8 	1.557 	5.984 	n. a. 	n. a.'''

joback_groups_str_dict = {}
joback_groups_id_dict = {}
JOBACK = namedtuple(
    'JOBACK',
    'i, name, Tc, Pc, Vc, Tb, Tm, Hform, Gform, Cpa, Cpb, Cpc, Cpd, Hfus, Hvap, mua, mub'
)
for i, line in enumerate(joback_data_txt.split('\n')):
    parsed = to_num(line.split('\t'))
    j = JOBACK(i + 1, *parsed)
    joback_groups_str_dict[parsed[0]] = j
    joback_groups_id_dict[i + 1] = j


def smarts_fragment(catalog, rdkitmol=None, smi=None):
    r'''Fragments a molecule into a set of unique groups and counts as
    specified by the `catalog`. The molecule can either be an rdkit 
    molecule object, or a smiles string which will be parsed by rdkit.
    Returns a dictionary of groups and their counts according to the
    indexes of the catalog provided.
    
    Parameters
    ----------
    catalog : dict
Beispiel #8
0
CRC_aqueous_thermodynamics = pd.read_csv(os.path.join(folder, 'CRC Thermodynamic Properties of Aqueous Ions.csv'),
                          sep='\t', index_col=0) 

electrolyte_dissociation_reactions = pd.read_csv(os.path.join(folder, 'Electrolyte dissociations.csv'), sep='\t')


_Laliberte_Density_ParametersDict = {}
_Laliberte_Viscosity_ParametersDict = {}
_Laliberte_Heat_Capacity_ParametersDict = {}


# Do not re-implement with Pandas, as current methodology uses these dicts in each function
with open(os.path.join(folder, 'Laliberte2009.tsv')) as f:
    next(f)
    for line in f:
        values = to_num(line.split('\t'))

        _name, CASRN, _formula, _MW, c0, c1, c2, c3, c4, Tmin, Tmax, wMax, pts = values[0:13]
        if c0:
            _Laliberte_Density_ParametersDict[CASRN] = {"Name":_name, "Formula":_formula,
            "MW":_MW, "C0":c0, "C1":c1, "C2":c2, "C3":c3, "C4":c4, "Tmin":Tmin, "Tmax":Tmax, "wMax":wMax}

        v1, v2, v3, v4, v5, v6, Tmin, Tmax, wMax, pts = values[13:23]
        if v1:
            _Laliberte_Viscosity_ParametersDict[CASRN] = {"Name":_name, "Formula":_formula,
            "MW":_MW, "V1":v1, "V2":v2, "V3":v3, "V4":v4, "V5":v5, "V6":v6, "Tmin":Tmin, "Tmax":Tmax, "wMax":wMax}

        a1, a2, a3, a4, a5, a6, Tmin, Tmax, wMax, pts = values[23:34]
        if a1:
            _Laliberte_Heat_Capacity_ParametersDict[CASRN] = {"Name":_name, "Formula":_formula,
            "MW":_MW, "A1":a1, "A2":a2, "A3":a3, "A4":a4, "A5":a5, "A6":a6, "Tmin":Tmin, "Tmax":Tmax, "wMax":wMax}
Beispiel #9
0
F = e*N_A


folder = os.path.join(os.path.dirname(__file__), 'Electrolytes')


_Laliberte_Density_ParametersDict = {}
_Laliberte_Viscosity_ParametersDict = {}
_Laliberte_Heat_Capacity_ParametersDict = {}


# Do not re-implement with Pandas, as current methodology uses these dicts in each function
with open(os.path.join(folder, 'Laliberte2009.csv')) as f:
    next(f)
    for line in f:
        values = to_num(line.split('\t'))

        _name, CASRN, _formula, _MW, c0, c1, c2, c3, c4, Tmin, Tmax, wMax, pts = values[0:13]
        if c0:
            _Laliberte_Density_ParametersDict[CASRN] = {"Name":_name, "Formula":_formula,
            "MW":_MW, "C0":c0, "C1":c1, "C2":c2, "C3":c3, "C4":c4, "Tmin":Tmin, "Tmax":Tmax, "wMax":wMax}

        v1, v2, v3, v4, v5, v6, Tmin, Tmax, wMax, pts = values[13:23]
        if v1:
            _Laliberte_Viscosity_ParametersDict[CASRN] = {"Name":_name, "Formula":_formula,
            "MW":_MW, "V1":v1, "V2":v2, "V3":v3, "V4":v4, "V5":v5, "V6":v6, "Tmin":Tmin, "Tmax":Tmax, "wMax":wMax}

        a1, a2, a3, a4, a5, a6, Tmin, Tmax, wMax, pts = values[23:34]
        if a1:
            _Laliberte_Heat_Capacity_ParametersDict[CASRN] = {"Name":_name, "Formula":_formula,
            "MW":_MW, "A1":a1, "A2":a2, "A3":a3, "A4":a4, "A5":a5, "A6":a6, "Tmin":Tmin, "Tmax":Tmax, "wMax":wMax}