def get_ready_result(is_ready): for i in is_ready: (smiles, outcomes) = self.pending_results[i].get(timeout=0.2) self.pending_results[i].forget() result = ForwardResult(smiles) for outcome in outcomes: result.add_product(ForwardProduct(smiles_list=outcome['smiles_list'], smiles=outcome['smiles'], template_ids=outcome[ 'template_ids'], num_examples=outcome[ 'num_examples'], edits=outcome['edits'])) yield result, is_ready
def get_candidate_edits(self, smiles): candidate_edits = [] stiched_result = ForwardResult(smiles) rct_temp = Chem.MolFromSmiles(smiles) [a.ClearProp('molAtomMapNumber') for a in rct_temp.GetAtoms()] split_smiles = Chem.MolToSmiles(rct_temp).split('.') print('SPLIT SMILES FOR GET_CANDIDATE_EDITS: {}'.format(split_smiles)) all_results = [] is_ready = [ i for (i, res) in enumerate(self.pending_results) if res.ready() ] while self.waiting_for_results(): try: for result, is_ready in self.get_ready_result(is_ready): for product in result.products: if product.smiles not in split_smiles: stiched_result.add_product(product) ''' products = result.get_products() for product in products: self.add_product(all_results, candidate_edits, product) ''' time.sleep(0.5) self.pending_results = [ res for (i, res) in enumerate(self.pending_results) if i not in is_ready ] is_ready = [ i for (i, res) in enumerate(self.pending_results) if res.ready() ] except Exception as e: print(e) pass for product in stiched_result.products: all_results.append(product.as_dict()) candidate_edits.append((product.get_smiles(), product.get_edits())) self.stop_expansion() return (all_results, candidate_edits)
def test_batch(ft, smiles, template_count, size): outcomes = [] for start_at in range(0, template_count, size): outcomes.append( ft.get_outcomes(smiles, 100, start_at=start_at, end_at=start_at + size, template_prioritization=gc.popularity)) unique_res = ForwardResult(smiles) for smiles, result in outcomes: unique_res.add_products(result.products) with open( os.path.join(os.path.dirname(__file__), 'expected/' + str(size) + '.pkl'), 'rb') as f: expected = pickle.load(f).get_products() result = unique_res.get_products() if len(result) != len(expected): return False for i in range(len(expected)): if expected[i].as_dict() != result[i].as_dict(): return False return True
def get_outcomes(self, smiles, mincount, template_prioritization, start_at=-1, end_at=-1, singleonly=True, stop_if=False, template_count=10000, max_cum_prob=1.0): """Performs a one-step synthesis reaction for a given SMILES string. Each candidate in self.result.products is of type ForwardProduct Args: smiles (str): SMILES string of ?? mincount (int): Minimum popularity of used templates. template_prioritization (??): Specifies method to use for ordering templates. start_at (int, optional): Index of first prioritized template to use. (default: {-1}) end_at (int, optional): Index of prioritized template to stop before. (default: {-1}) singleonly (bool, optional): Whether to reduce each product to the largest (longest) one. (default: {True}) stop_if (bool or string, optional): SMILES string of molecule to stop at if found, or False for no target. (default: {False}) template_count (int, optional): Maximum number of templates to use. (default: {10000}) max_cum_prob (float, optional): Maximum cumulative probability of all templates used. (default: {1.0}) """ self.get_template_prioritizers(template_prioritization) # Get sorted by popularity during loading. if template_prioritization == gc.popularity: prioritized_templates = self.templates else: self.template_prioritizer.set_max_templates(template_count) self.template_prioritizer.set_max_cum_prob(max_cum_prob) prioritized_templates = self.template_prioritizer.get_priority( (self.templates, smiles)) self.mincount = mincount self.start_at = start_at self.singleonly = singleonly self.stop_if = stop_if if end_at == -1 or end_at >= len(self.templates): self.end_at = len(self.templates) else: self.end_at = end_at # Define mol to operate on mol = Chem.MolFromSmiles(smiles) clean_reactant_mapping(mol) [a.SetIsotope(i + 1) for (i, a) in enumerate(mol.GetAtoms())] reactants_smiles = Chem.MolToSmiles(mol) smiles = Chem.MolToSmiles( mol, isomericSmiles=USE_STEREOCHEMISTRY) # to canonicalize # Initialize results object if self.celery: result = [] else: result = ForwardResult(smiles) for i in range(self.start_at, self.end_at): # only use templates between the specified boundaries. template = prioritized_templates[i] if template['count'] > mincount: products = self.apply_one_template(mol, smiles, template, singleonly=singleonly, stop_if=stop_if) if self.celery: for product in products: result.append({ 'smiles_list': product.smiles_list, 'smiles': product.smiles, 'edits': product.edits, 'template_ids': product.template_ids, 'num_examples': product.num_examples }) else: result.add_products(products) return (smiles, result)
return results if __name__ == '__main__': MyLogger.initialize_logFile() ft = ForwardTransformer(mincount=10) ft.load() template_count = ft.template_count() smiles = 'NC(=O)[C@H](CCC=O)N1C(=O)c2ccccc2C1=O' for batch_size in range(100, 1000, 100): print() print(batch_size) outcomes = [] i = 0 for start_at in range(0, template_count, batch_size): i += 1 outcomes.append( ft.get_outcomes(smiles, 100, start_at=start_at, end_at=start_at + batch_size, template_prioritization=gc.popularity)) print('Ran {} batches of {} templates'.format(i, batch_size)) unique_res = ForwardResult(smiles) for smiles, result in outcomes: unique_res.add_products(result.products) print(len(unique_res.products))
def get_outcomes(self, smiles, mincount, template_prioritization, start_at=-1, end_at=-1, singleonly=True, stop_if=False, template_count=10000, max_cum_prob=1.0): ''' Each candidate in self.result.products is of type ForwardProduct ''' self.get_template_prioritizers(template_prioritization) # Get sorted by popularity during loading. if template_prioritization == gc.popularity: prioritized_templates = self.templates else: self.template_prioritizer.set_max_templates(template_count) self.template_prioritizer.set_max_cum_prob(max_cum_prob) prioritized_templates = self.template_prioritizer.get_priority( (self.templates, smiles)) self.mincount = mincount self.start_at = start_at self.singleonly = singleonly self.stop_if = stop_if if end_at == -1 or end_at >= len(self.templates): self.end_at = len(self.templates) else: self.end_at = end_at # Define mol to operate on mol = Chem.MolFromSmiles(smiles) clean_reactant_mapping(mol) reactants_smiles = Chem.MolToSmiles(mol) smiles = Chem.MolToSmiles( mol, isomericSmiles=USE_STEREOCHEMISTRY) # to canonicalize # Initialize results object if self.celery: result = [] else: result = ForwardResult(smiles) for i in range(self.start_at, self.end_at): # only use templates between the specified boundaries. template = prioritized_templates[i] if template['count'] > mincount: products = self.apply_one_template(mol, smiles, template, singleonly=singleonly, stop_if=stop_if) if self.celery: for product in products: result.append({ 'smiles_list': product.smiles_list, 'smiles': product.smiles, 'edits': product.edits, 'template_ids': product.template_ids, 'num_examples': product.num_examples }) else: result.add_products(products) return (smiles, result)