def __getitem__(self, item): if self.images is not None: image = self.images[item] image = self.transform(image) if self.imputer is not None: vec = self.scaler.transform(self.imputer.transform(self.descs[item].reshape(1,-1))).flatten() else: vec = self.descs[item].flatten() vec = torch.from_numpy(np.nan_to_num(vec, nan=0, posinf=0, neginf=0)).float() return image, vec if self.cache and self.smiles[item] in self.data_cache: image = self.data_cache[self.smiles[item]] image = self.transform(image) if self.imputer is not None: vec = self.scaler.transform(self.imputer.transform(self.descs[item].reshape(1,-1))).flatten() else: vec = self.descs[item].flatten() vec = torch.from_numpy(np.nan_to_num(vec, nan=0, posinf=0, neginf=0)).float() return image, vec else: mol = Chem.MolFromSmiles(self.smiles[item]) image = smiles_to_image(mol) if self.imputer is not None: vec = self.scaler.transform(self.imputer.transform(self.descs[item].reshape(1,-1))).flatten() else: vec = self.descs[item].flatten() vec = torch.from_numpy(np.nan_to_num(vec, nan=0, posinf=0, neginf=0)).float() if self.cache: self.data_cache[self.smiles[item]] = image image = self.transform(image) return image, vec
def __getitem__(self, item): if self.images is not None: image = transforms.ToPILImage()(torch.from_numpy( self.images[item].astype(np.float32) / 255.0)) image = self.transform(image) if self.descs is None: return image if self.imputer is not None: vec = self.scaler.transform( self.imputer.transform(self.descs[item].reshape( 1, -1))).flatten() else: vec = self.descs[item].flatten() vec = torch.from_numpy( np.nan_to_num(vec, nan=0, posinf=0, neginf=0)).float() if self.use_mask: return image, vec, self.mask[item] else: return image, vec if self.cache and self.smiles[item] in self.data_cache: if self.use_mask: assert (False) image = self.data_cache[self.smiles[item]] image = self.transform(image) if self.imputer is not None: vec = self.scaler.transform( self.imputer.transform(self.descs[item].reshape( 1, -1))).flatten() else: vec = self.descs[item].flatten() vec = torch.from_numpy( np.nan_to_num(vec, nan=0, posinf=0, neginf=0)).float() return image, vec else: if self.use_mask: assert (False) mol = Chem.MolFromSmiles(self.smiles[item]) image = smiles_to_image(mol) if self.imputer is not None: vec = self.scaler.transform( self.imputer.transform(self.descs[item].reshape( 1, -1))).flatten() else: vec = self.descs[item].flatten() vec = torch.from_numpy( np.nan_to_num(vec, nan=0, posinf=0, neginf=0)).float() if self.cache: self.data_cache[self.smiles[item]] = image image = self.transform(image) return image, vec
def __getitem__(self, item): if self.cache and self.smiles[item] in self.data_cache: return self.data_cache[self.smiles[item]] else: mol = Chem.MolFromSmiles(self.smiles[item]) image = smiles_to_image(mol) property = self.property_func(mol) # TODO align property if self.values == 1: if property is None: property = -1.0 property = torch.FloatTensor([property]).view((1)) else: property = torch.from_numpy(np.nan_to_num(property, nan=0, posinf=0, neginf=0)).float() if self.cache: self.data_cache[self.smiles[item]] = (image, property) return image, property
def get_image(self): if self.image is None: self.image = smiles_to_image(self.mol) self.data['image'] = self.image return self.image
import pandas as pd from rdkit import Chem from features import generateFeatures import argparse import pickle from tqdm import tqdm def get_args(): parser = argparse.ArgumentParser() parser.add_argument('-i', type=str, required=True) parser.add_argument('-o', type=str, required=True) return parser.parse_args() if __name__ == '__main__': args = get_args() images = [] smiles = pd.read_csv(args.i, header=None) smiles = list(smiles.iloc[:, 0]) for smile in tqdm(smiles): mol = Chem.MolFromSmiles(smile) if mol is not None: image = generateFeatures.smiles_to_image(mol) images.append(image) with open(args.o, 'wb') as f: pickle.dump(images, f)
def get_image(mol): image = (255 * transforms.ToTensor()(Invert()( generateFeatures.smiles_to_image(mol))).numpy()).astype(np.uint8) return image