예제 #1
0
def load_economic_data():
    global HPV_data
    if HPV_data is not None:
        return None
    global _EPACDRDict, _ECHATonnageDict
    import pandas as pd
    import zipfile
    folder = os_path_join(os.path.dirname(__file__), 'Law')
    '''OECD are chemicals produced by and OECD members in > 1000 tonnes/year.'''
    HPV_data = pd.read_csv(os.path.join(folder, 'HPV 2015 March 3.csv'),
                           sep='\t',
                           index_col=0)
    # 13061-29-2 not valid and removed

    _ECHATonnageDict = {}
    with zipfile.ZipFile(os.path.join(folder,
                                      'ECHA Tonnage Bands.csv.zip')) as z:
        with z.open(z.namelist()[0]) as f:
            for line in f.readlines():
                # for some reason, the file must be decoded to UTF8 first
                CAS, band = line.decode("utf-8").strip('\n').split('\t')
                if CAS in _ECHATonnageDict:
                    if band in _ECHATonnageDict[CAS]:
                        pass
                    else:
                        _ECHATonnageDict[CAS].append(band)
                else:
                    _ECHATonnageDict[CAS] = [band]

    _EPACDRDict = {}
    with open(os.path.join(folder,
                           'EPA 2012 Chemical Data Reporting.csv')) as f:
        '''EPA summed reported chemical usages. In metric tonnes/year after conversion.
        Many producers keep their date confidential.
        This was originally in terms of lb/year, but rounded to the nearest kg.

        '''
        next(f)
        for line in f:
            values = line.rstrip().split('\t')
            CAS, manufactured, imported, exported = to_num(values)
            _EPACDRDict[CAS] = {
                "Manufactured": manufactured / 1000.,
                "Imported": imported / 1000.,
                "Exported": exported / 1000.
            }
예제 #2
0
def load_mixture_composition():
    global mixture_composition_loaded, common_mixtures_by_synonym, common_mixtures
    common_mixtures = {}
    common_mixtures_by_synonym = {}
    with open(os.path.join(folder, 'Mixtures Compositions.tsv')) as f:
        """Read in a dict of 90 or so mixutres, their components, and synonyms.

        Small errors in mole fractions not adding to 1 are known. Errors in
        adding mass fraction are less common, present at the 5th decimal. Mass
        basis is assumed for all mixtures.
        """
        next(f)
        for line in f:
            values = to_num(line.strip('\n').strip('\t').split('\t'))
            name, source, N = values[0:3]
            N = int(N)
            CASs, names, ws, zs = values[3:3 + N], values[
                3 + N:3 + 2 * N], values[3 + 2 * N:3 +
                                         3 * N], values[3 + 3 * N:3 + 4 * N]
            synonyms = values[3 + 4 * N:]
            if synonyms:
                synonyms = [i.lower() for i in synonyms]
            synonyms.append(name.lower())
            obj = CommonMixtureMetadata(name=name,
                                        CASs=CASs,
                                        N=N,
                                        source=source,
                                        names=names,
                                        ws=ws,
                                        zs=zs,
                                        synonyms=synonyms)
            common_mixtures[name] = obj

            for syn in synonyms:
                common_mixtures_by_synonym[syn] = obj
    mixture_composition_loaded = True
예제 #3
0
        self.Cpc = Cpc
        self.Cpd = Cpd
        self.Hfus = Hfus
        self.Hvap = Hvap
        self.mua = mua
        self.mub = mub

    def __repr__(self):
        return '''JOBACK(i=%r, name=%r, Tc=%r, Pc=%r, Vc=%r, Tb=%r, Tm=%r, Hform=%r, Gform=%r,
Cpa=%r, Cpb=%r, Cpc=%r, Cpd=%r, Hfus=%r, Hvap=%r, mua=%r, mub=%r)''' % (
        self.i, self.name, self.Tc, self.Pc, self.Vc, self.Tb, self.Tm,
        self.Hform, self.Gform, self.Cpa, self.Cpb, self.Cpc, self.Cpd,
        self.Hfus, self.Hvap, self.mua, self.mub)

for i, line in enumerate(joback_data_txt.split('\n')):
    parsed = to_num(line.split('\t'))
    j = JOBACK(i+1, *parsed)
    joback_groups_str_dict[parsed[0]] = j
    joback_groups_id_dict[i+1] = j


def smarts_fragment(catalog, rdkitmol=None, smi=None, deduplicate=True):
    r'''Fragments a molecule into a set of unique groups and counts as
    specified by the `catalog`. The molecule can either be an rdkit
    molecule object, or a smiles string which will be parsed by rdkit.
    Returns a dictionary of groups and their counts according to the
    indexes of the catalog provided.

    Parameters
    ----------
    catalog : dict