def load_economic_data(): global HPV_data if HPV_data is not None: return None global _EPACDRDict, _ECHATonnageDict import pandas as pd import zipfile folder = os_path_join(os.path.dirname(__file__), 'Law') '''OECD are chemicals produced by and OECD members in > 1000 tonnes/year.''' HPV_data = pd.read_csv(os.path.join(folder, 'HPV 2015 March 3.csv'), sep='\t', index_col=0) # 13061-29-2 not valid and removed _ECHATonnageDict = {} with zipfile.ZipFile(os.path.join(folder, 'ECHA Tonnage Bands.csv.zip')) as z: with z.open(z.namelist()[0]) as f: for line in f.readlines(): # for some reason, the file must be decoded to UTF8 first CAS, band = line.decode("utf-8").strip('\n').split('\t') if CAS in _ECHATonnageDict: if band in _ECHATonnageDict[CAS]: pass else: _ECHATonnageDict[CAS].append(band) else: _ECHATonnageDict[CAS] = [band] _EPACDRDict = {} with open(os.path.join(folder, 'EPA 2012 Chemical Data Reporting.csv')) as f: '''EPA summed reported chemical usages. In metric tonnes/year after conversion. Many producers keep their date confidential. This was originally in terms of lb/year, but rounded to the nearest kg. ''' next(f) for line in f: values = line.rstrip().split('\t') CAS, manufactured, imported, exported = to_num(values) _EPACDRDict[CAS] = { "Manufactured": manufactured / 1000., "Imported": imported / 1000., "Exported": exported / 1000. }
def load_mixture_composition(): global mixture_composition_loaded, common_mixtures_by_synonym, common_mixtures common_mixtures = {} common_mixtures_by_synonym = {} with open(os.path.join(folder, 'Mixtures Compositions.tsv')) as f: """Read in a dict of 90 or so mixutres, their components, and synonyms. Small errors in mole fractions not adding to 1 are known. Errors in adding mass fraction are less common, present at the 5th decimal. Mass basis is assumed for all mixtures. """ next(f) for line in f: values = to_num(line.strip('\n').strip('\t').split('\t')) name, source, N = values[0:3] N = int(N) CASs, names, ws, zs = values[3:3 + N], values[ 3 + N:3 + 2 * N], values[3 + 2 * N:3 + 3 * N], values[3 + 3 * N:3 + 4 * N] synonyms = values[3 + 4 * N:] if synonyms: synonyms = [i.lower() for i in synonyms] synonyms.append(name.lower()) obj = CommonMixtureMetadata(name=name, CASs=CASs, N=N, source=source, names=names, ws=ws, zs=zs, synonyms=synonyms) common_mixtures[name] = obj for syn in synonyms: common_mixtures_by_synonym[syn] = obj mixture_composition_loaded = True
self.Cpc = Cpc self.Cpd = Cpd self.Hfus = Hfus self.Hvap = Hvap self.mua = mua self.mub = mub def __repr__(self): return '''JOBACK(i=%r, name=%r, Tc=%r, Pc=%r, Vc=%r, Tb=%r, Tm=%r, Hform=%r, Gform=%r, Cpa=%r, Cpb=%r, Cpc=%r, Cpd=%r, Hfus=%r, Hvap=%r, mua=%r, mub=%r)''' % ( self.i, self.name, self.Tc, self.Pc, self.Vc, self.Tb, self.Tm, self.Hform, self.Gform, self.Cpa, self.Cpb, self.Cpc, self.Cpd, self.Hfus, self.Hvap, self.mua, self.mub) for i, line in enumerate(joback_data_txt.split('\n')): parsed = to_num(line.split('\t')) j = JOBACK(i+1, *parsed) joback_groups_str_dict[parsed[0]] = j joback_groups_id_dict[i+1] = j def smarts_fragment(catalog, rdkitmol=None, smi=None, deduplicate=True): r'''Fragments a molecule into a set of unique groups and counts as specified by the `catalog`. The molecule can either be an rdkit molecule object, or a smiles string which will be parsed by rdkit. Returns a dictionary of groups and their counts according to the indexes of the catalog provided. Parameters ---------- catalog : dict