def from_molecules(self, mols: Molecules): tox_data = mols.get_mols_with_passfail_labels() X = tox_data.index y = tox_data.astype('int') self.language.fit(mols.get_all_mols(), X, y) self.toxicity = Toxicity(self.hyperparams, self.language) self.toxicity.build(X, y) print(f"Trained new chemistry model: {self.uuid}")
def to_sql(self, sess: Session): source_id = add_dataset(sess, self.origin) add_props(sess, self.properties) mols = Molecules(sess) for row in tqdm(self._generate(), total=len(new_client.molecule), unit=' row'): smiles, props = row mols.add(source_id, smiles, props)
def run_train_models(dbstring: str): sess = _create_sess(dbstring) mols = Molecules(sess) lm = Language() tox_data = mols.get_mols_with_passfail_labels() X = tox_data.index y = tox_data.astype('int') lm.fit(mols.get_all_mols(), X, y) Path('data/models').mkdir(parents=True, exist_ok=True) lm.save('data/models/language.model') tox = Toxicity(lm) tox.fit(X, y) tox.save('data/models/toxicity.model')
def to_sql(self, sess: Session): source_id = add_dataset(sess, self.origin) add_props(sess, self.props) mols = Molecules(sess) mols.add(source_id, 'CN1CCC[C@H]1c2cccnc2', {'Tag': 'Test'}, PartitionCategory.Unspecific) mols.add(source_id, 'O1C=C[C@H]([C@H]1O2)c3c2cc(OC)c4c3OC(=O)C5=C4CCC(=O)5', { 'Tag': ['Test1', 'Test2'], 'NR-AR': 1.0 }, PartitionCategory.Verify) mols.commit()
def run_train_models(dbstring: str, hp=Hyperparameters()) -> Chemistry: sess = _create_sess(dbstring) mols = Molecules(sess) chem = Chemistry(hp) chem.from_molecules(mols) Path('data', 'chemistry').mkdir(parents=True, exist_ok=True) with open(Path('data', 'chemistry', chem.uuid), 'wb') as fd: pickle.dump(chem, fd) return chem
def to_sql(self, sess: Session): source_id = add_dataset(sess, self.origin) add_props(sess, self.properties) mols = Molecules(sess) df = self.to_df() row_count, _ = df.shape for _, row in tqdm(df.iterrows(), total=row_count, unit=' row'): mols.add(source_id, row.smiles, row.to_dict()) mols.commit()
def to_sql(self, sess: Session): source_id = add_dataset(sess, self.origin) mols = Molecules(sess) df = self.to_df() row_count, _ = df.shape for _, row in tqdm(df.iterrows(), total=row_count, unit=' row'): if row.SPLIT == 'train': partition = PartitionCategory.Train elif row.SPLIT == 'test': partition = PartitionCategory.Test else: partition = PartitionCategory.Unspecific mols.add(source_id, row.SMILES, {'Tag': 'MOSES'}, partition) mols.commit()