Exemple #1
0
 def test_smiles(self):
     with open('%s/../data/hwi-compounds.csv' % self.path, 'rb') as fh:
         reader = csv.DictReader(fh, delimiter="\t")
         for row in reader:
             if len(row['smiles']) == 0: continue
             mol = smilin(row['smiles'])
             assert mol != None
Exemple #2
0
    def _set_summary_stats(self, path):
        """
        Set summary data for each compound (ex. mw,density,smiles).

        """
        cols = ['molecular_weight', 'density']
        data = {}
        with open(path, 'rb') as csvfile:
            reader = csv.DictReader(csvfile, delimiter="\t")
            for row in reader:
                data[row['name'].lower()] = row
        
        for ck in self.cocktails:
            for cp in ck.components:
                if cp.name not in data:
                    logger.info("Missing summary data for compound: %s" % cp.name)
                    continue

                row = data[cp.name]
                for key in cols:
                    if key in row and len(row[key]) > 0:
                        setattr(cp, key, float(row[key]))
                    else:
                        setattr(cp, key, None)

                if 'smiles' in row and len(row['smiles']) > 0:
                    cp.smiles = row['smiles']
                    try:
                        mol = smilin(cp.smiles)
                    except:
                        logger.info("Invalid smiles format, failed to parse smiles for compound: %s" % cp.name)
                else:
                    logger.info("Missing smiles data for compound: %s" % cp.name)
Exemple #3
0
    def mol(self):
        if self.smiles in _mol_cache:
            return _mol_cache[self.smiles]

        mol = None
        if self.smiles is not None:
            try:
                mol = smilin(self.smiles)
                _mol_cache[self.smiles] = mol
            except:
                logger.critical("Invalid smiles format, failed to parse smiles for compound: %s" % self.name)

        return mol
Exemple #4
0
    def test_smilein(self):
        smiles_strings = []
        with open("{}/smiles.txt".format(self.path), 'rb') as fh:
            for line in fh:
                smiles_strings.append(line.strip())

        for smile in smiles_strings:
            mol = smilin(smile)
            out = mol.arbsmiles()
            can = mol.cansmiles()
            for bond in mol.bonds:
                print bond.symbol, bond.bondorder, bond.bondtype, bond.fixed
            for atom in mol.atoms:
                print atom, atom.sumBondOrders()
                
            print smile, out, can
Exemple #5
0
    def test_ecfp(self):
        """Test Butyramide (example used in ECFP paper)"""
        mol = smilin("CCCC(=O)N")

        # ECFP_0
        fp = ecfp(mol, radius=0)
        assert len(fp) == 5

        # ECFP_2
        fp = ecfp(mol, radius=1)
        assert len(fp) == 11

        # ECFP_4
        fp = ecfp(mol, radius=2)
        assert len(fp) == 14

        # ECFP_6
        fp = ecfp(mol, radius=3)
        assert len(fp) == 14
Exemple #6
0
def molecule_embedding(filename):
    dict = {}
    df = pd.read_excel(filename)
    for id, smile in zip(df.MOLENAME, df.SMILES):
        try:
            mol = smilin(smile)
            try:
                fp = ecfp(mol, radius=3)
                n_bits = 1024  # Number of bits in fixed-length fingerprint
                fingerprint = [0 for _ in range(n_bits)
                               ]  #fingerprints as a python list
                for nbrhood_hash in fp.keys():
                    bit = nbrhood_hash % n_bits
                    fingerprint[bit] = 1
                #print(fingerprint)
                dict[id] = fingerprint
            except:
                print('Bad ecfp for', id)
        except:
            print('Bad mol for', id)
    return list(dict.values())