예제 #1
0
 def from_molecules(self, mols: Molecules):
     tox_data = mols.get_mols_with_passfail_labels()
     X = tox_data.index
     y = tox_data.astype('int')
     self.language.fit(mols.get_all_mols(), X, y)
     self.toxicity = Toxicity(self.hyperparams, self.language)
     self.toxicity.build(X, y)
     print(f"Trained new chemistry model: {self.uuid}")
예제 #2
0
    def to_sql(self, sess: Session):
        source_id = add_dataset(sess, self.origin)
        add_props(sess, self.properties)
        mols = Molecules(sess)

        for row in tqdm(self._generate(),
                        total=len(new_client.molecule),
                        unit=' row'):
            smiles, props = row
            mols.add(source_id, smiles, props)
예제 #3
0
def run_train_models(dbstring: str):
    sess = _create_sess(dbstring)
    mols = Molecules(sess)

    lm = Language()
    tox_data = mols.get_mols_with_passfail_labels()
    X = tox_data.index
    y = tox_data.astype('int')
    lm.fit(mols.get_all_mols(), X, y)

    Path('data/models').mkdir(parents=True, exist_ok=True)

    lm.save('data/models/language.model')

    tox = Toxicity(lm)
    tox.fit(X, y)

    tox.save('data/models/toxicity.model')
예제 #4
0
 def to_sql(self, sess: Session):
     source_id = add_dataset(sess, self.origin)
     add_props(sess, self.props)
     mols = Molecules(sess)
     mols.add(source_id, 'CN1CCC[C@H]1c2cccnc2', {'Tag': 'Test'},
              PartitionCategory.Unspecific)
     mols.add(source_id,
              'O1C=C[C@H]([C@H]1O2)c3c2cc(OC)c4c3OC(=O)C5=C4CCC(=O)5', {
                  'Tag': ['Test1', 'Test2'],
                  'NR-AR': 1.0
              }, PartitionCategory.Verify)
     mols.commit()
예제 #5
0
def run_train_models(dbstring: str, hp=Hyperparameters()) -> Chemistry:
    sess = _create_sess(dbstring)
    mols = Molecules(sess)
    chem = Chemistry(hp)
    chem.from_molecules(mols)

    Path('data', 'chemistry').mkdir(parents=True, exist_ok=True)
    with open(Path('data', 'chemistry', chem.uuid), 'wb') as fd:
        pickle.dump(chem, fd)

    return chem
예제 #6
0
 def to_sql(self, sess: Session):
     source_id = add_dataset(sess, self.origin)
     add_props(sess, self.properties)
     mols = Molecules(sess)
     df = self.to_df()
     row_count, _ = df.shape
     for _, row in tqdm(df.iterrows(), total=row_count, unit=' row'):
         mols.add(source_id, row.smiles, row.to_dict())
     mols.commit()
예제 #7
0
    def to_sql(self, sess: Session):
        source_id = add_dataset(sess, self.origin)
        mols = Molecules(sess)
        df = self.to_df()
        row_count, _ = df.shape
        for _, row in tqdm(df.iterrows(), total=row_count, unit=' row'):
            if row.SPLIT == 'train':
                partition = PartitionCategory.Train
            elif row.SPLIT == 'test':
                partition = PartitionCategory.Test
            else:
                partition = PartitionCategory.Unspecific
            mols.add(source_id, row.SMILES, {'Tag': 'MOSES'}, partition)

        mols.commit()