コード例 #1
0
    def test_get_cid(self):
        self.assertEqual(get_cid("64-17-5"), 702)
        self.assertEqual(get_cid("141-78-6"), 8857)
        self.assertEqual(get_cid("110-01-0"), 1127)

        cids = get_cids(["64-17-5", "141-78-6", "110-01-0"])
        self.assertEqual(cids["64-17-5"], 702)
        self.assertEqual(cids["141-78-6"], 8857)
        self.assertEqual(cids["110-01-0"], 1127)
コード例 #2
0
ファイル: arctander.py プロジェクト: pyrfume/pyrfume
import pyrfume
df = pyrfume.load_data('arctander_1960/Arctander Master.xlsx')

from rdkit.Chem.rdinchi import InchiToInchiKey
df['InChiKey'] = df['InChiKey'].apply(lambda x: InchiToInchiKey(x) if 'InChI=' in str(x) and str(x)!='nan' else x)

from tqdm.auto import tqdm
from pyrfume.odorants import get_cid, get_cids
for index, row in tqdm(df.iterrows(), total=df.shape[0]):
    #if index < 215:
    #    continue
    cid = 0
    for j, col in enumerate(['InChiKey', 'SMILES', 'CAS', 'ChemicalName']):
        if not str(row[col]) == 'nan':
            cid = get_cid(row[col], kind=(col if j<2 else 'name'))
            if cid:
                break
    df.loc[index, 'new_CID'] = cid

df[df['new_CID'].isnull()]

df.join(df[[]])

pyrfume.save_data(df, "arctander_1960/arctander.csv")

df.dropna(subset=["ChemicalName"]).shape

x = dict(df.dropna(subset=["ChemicalName"]).set_index("ChemicalName")["Description"])

dict(df.set_index('CID')["Description"])
コード例 #3
0
for name, url_suffix in df[df['CID'] == 0]['url'].items():
    url = 'https://senselab.med.yale.edu/OdorDB/%s' % url_suffix
    f = urlopen(url)
    html = f.read()
    soup = bs4.BeautifulSoup(html)
    table = soup.find('table')
    cas_row = table.find_all('tr')[5]
    cas_text = cas_row.find_all('span')[-1].text
    cas = cas_text.replace('\r\n', '').strip()
    df.loc[name, 'CAS'] = cas

# +
# Add CIDs obtained from searching the CAS string
for name, cas in df[df['CAS'].notnull()]['CAS'].items():
    if cas:
        cid = odorants.get_cid(cas, kind='name')
        df.loc[name, 'CID'] = cid

# Fill remaining missing CIDs with 0
df.loc[:, 'CID'] = df['CID'].fillna(0)
# -

# Manual fills
df.loc['2,4,5-TRIMETHYLTHIAZOLINE', 'CID'] = 263626
df.loc['METHYLSALICYLATE', 'CID'] = 4133
df.loc['PHENYLETHYL ALCOHOL (PEA)', 'CID'] = 6054
df.loc['Perillaalcohol', 'CID'] = 10819
df.loc['Perillaaldehyde', 'CID'] = 16441
#df[df['CID']==0]

file_path = os.path.join(pyrfume.DATA, 'senselab.csv')
コード例 #4
0
df = df.join(pd.Series(smiles_cids, name='CID'))
df.head()

from rdkit.Chem import MolFromSmiles, MolToSmiles
df['SMILES'] = df.index
p = ProgressBar(len(smiles_cids))
for i, (old, cid) in enumerate(smiles_cids.items()):
    p.animate(i, status=old)
    if cid == 0:
        mol = MolFromSmiles(old)
        if mol is None:
            new = ''
        else:
            new = MolToSmiles(mol, isomericSmiles=True)
            if old != new:
                cid = get_cid(new, kind='SMILES')
        df.loc[old, ['SMILES', 'CID']] = [new, cid]
p.animate(i + 1, status='Done')

df[df['SMILES'] == '']

ozone_smiles = ozone_cid = get_cid('[O-][O+]=O', kind='SMILES')
df.loc['O=[O]=O', ['SMILES', 'CID']] = [ozone_smiles, ozone_cid]

df = df.set_index('CID').drop(['ez_smiles'], axis=1)

df = df.rename(columns={'author': 'year', 'year': 'author'})

df.head()

pyrfume.save_data(df, 'thresholds/parsed_threshold_data_in_air_fixed.csv')
コード例 #5
0
results = odorants.get_cids(leffingwell_data_raw['smiles'],
                            kind='SMILES',
                            verbose=False)

leffingwell_data = pd.Series(results, name='CID').to_frame().join(
    leffingwell_data_raw.set_index('smiles'))
leffingwell_data.head()

for smiles in leffingwell_data[leffingwell_data['CID'] == 0].index:
    name = leffingwell_data.loc[smiles, 'chemical_name']
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        print("Bad smiles: %s" % smiles)
    else:
        smiles = Chem.MolToSmiles(mol, isomericSmiles=True)
    cid = odorants.get_cid(smiles, kind='smiles', verbose=True)
    if cid:
        print(name, cid)
    else:
        print(name, smiles)

leffingwell_data = pd.Series(results, name='CID').to_frame().join(
    leffingwell_data_raw.set_index('smiles'))
leffingwell_data[leffingwell_data['CID'] == 0]

x = leffingwell_data.reset_index().set_index('chemical_name')
#x.loc['calcium alginate', 0]
x[x['CID'] == 0].head()

file_path = os.path.join(pyrfume.DATA, 'westeros', 'westeros.csv')
leffingwell_data.to_csv(file_path)