Beispiel #1
0
    def runSA(self, ifn, ofn, error_ofn, restart=True, startline=0):
        import sys
        sys.stderr = open('/dev/null', 'w')  # silence the warnings

        if restart is True:
            err_ofs = open(error_ofn, 'w')
            with open(ofn, 'w') as ofs:
                with open(ifn, 'r') as ifs:
                    smis = [_.rstrip() for _ in ifs]
                    for smi in smis:
                        try:
                            m = Chem.MolFromSmiles(smi)
                            sa = sascorer.calculateScore(m)
                            ofs.write("%s\t%f\n" % (smi, sa))
                        except Exception as inst:
                            err_ofs.write("%s\t%s\n" % (smi, inst.args))
            err_ofs.close()
        else:
            err_ofs = open(error_ofn, 'a')
            with open(ofn, 'a') as ofs:
                with open(ifn, 'r') as ifs:
                    smis = [_.rstrip() for _ in ifs]
                    for smi in smis[startline:]:
                        try:
                            m = Chem.MolFromSmiles(smi)
                            sa = sascorer.calculateScore(m)
                            ofs.write("%s\t%f\n" % (smi, sa))
                        except Exception as inst:
                            err_ofs.write("%s\t%s\n" % (smi, inst.args))
            err_ofs.close()
Beispiel #2
0
    def runSA(self, ifn, ofn, error_ofn, restart=True, startline=0):
        import sys
        sys.stderr = open('/dev/null', 'w')  # silence the warnings

        if restart is True:
            err_ofs = open(error_ofn, 'w')
            with open(ofn, 'w') as ofs:
                with open(ifn, 'r') as ifs:
                    smis = [_.rstrip() for _ in ifs]
                    for smi in smis:
                        try:
                            m = Chem.MolFromSmiles(smi)
                            sa = sascorer.calculateScore(m)
                            ofs.write("%s\t%f\n" % (smi, sa))
                        except Exception as inst:
                            err_ofs.write("%s\t%s\n" % (smi, inst.args))
            err_ofs.close()
        else:
            err_ofs = open(error_ofn, 'a')
            with open(ofn, 'a') as ofs:
                with open(ifn, 'r') as ifs:
                    smis = [_.rstrip() for _ in ifs]
                    for smi in smis[startline:]:
                        try:
                            m = Chem.MolFromSmiles(smi)
                            sa = sascorer.calculateScore(m)
                            ofs.write("%s\t%f\n" % (smi, sa))
                        except Exception as inst:
                            err_ofs.write("%s\t%s\n" % (smi, inst.args))
            err_ofs.close()
Beispiel #3
0
def logP_score(s):
  m = Chem.MolFromSmiles(s)
  logp = Descriptors.MolLogP(m)
  SA_score = -sascorer.calculateScore(m)
  #cycle_list = nx.cycle_basis(nx.Graph(rdmolops.GetAdjacencyMatrix(m)))
  cycle_list = m.GetRingInfo().AtomRings() #remove networkx dependence
  #print cycle_list
  if len(cycle_list) == 0:
      cycle_length = 0
  else:
      cycle_length = max([ len(j) for j in cycle_list ])
  if cycle_length <= 6:
      cycle_length = 0
  else:
      cycle_length = cycle_length - 6
  cycle_score = -cycle_length
  #print cycle_score
  #print SA_score
  #print logp
  SA_score_norm=(SA_score-SA_mean)/SA_std
  logp_norm=(logp-logP_mean)/logP_std
  cycle_score_norm=(cycle_score-cycle_mean)/cycle_std
  score_one = SA_score_norm + logp_norm + cycle_score_norm
  
  return score_one
Beispiel #4
0
def penalized_logp(s):
    if s is None: return -100.0
    mol = Chem.MolFromSmiles(s)
    if mol is None: return -100.0

    logP_mean = 2.4570953396190123
    logP_std = 1.434324401111988
    SA_mean = -3.0525811293166134
    SA_std = 0.8335207024513095
    cycle_mean = -0.0485696876403053
    cycle_std = 0.2860212110245455

    log_p = Descriptors.MolLogP(mol)
    SA = -sascorer.calculateScore(mol)

    # cycle score
    cycle_list = nx.cycle_basis(nx.Graph(
        Chem.rdmolops.GetAdjacencyMatrix(mol)))
    if len(cycle_list) == 0:
        cycle_length = 0
    else:
        cycle_length = max([len(j) for j in cycle_list])
    if cycle_length <= 6:
        cycle_length = 0
    else:
        cycle_length = cycle_length - 6
    cycle_score = -cycle_length

    normalized_log_p = (log_p - logP_mean) / logP_std
    normalized_SA = (SA - SA_mean) / SA_std
    normalized_cycle = (cycle_score - cycle_mean) / cycle_std
    return normalized_log_p + normalized_SA + normalized_cycle
Beispiel #5
0
def gaussion_workers(chem_model, val):
    while True:
        simulation_time = time.time()
        task = comm.recv(source=0, tag=MPI.ANY_TAG, status=status)
        tag = status.Get_tag()
        if tag == START:
            state = task[0]
            m = task[1]
            all_posible = chem_kn_simulation(chem_model, state, val, m)
            generate_smile = predict_smile(all_posible, val)
            new_compound = make_input_smile(generate_smile)
            score = []
            kao = []

            try:
                m = Chem.MolFromSmiles(str(new_compound[0]))
            except:
                m = None
            #if m!=None and len(task[i])<=81:
            if m != None:
                try:
                    logp = Descriptors.MolLogP(m)
                except:
                    logp = -1000
                SA_score = -sascorer.calculateScore(
                    MolFromSmiles(new_compound[0]))
                cycle_list = nx.cycle_basis(
                    nx.Graph(
                        rdmolops.GetAdjacencyMatrix(
                            MolFromSmiles(new_compound[0]))))
                if len(cycle_list) == 0:
                    cycle_length = 0
                else:
                    cycle_length = max([len(j) for j in cycle_list])
                if cycle_length <= 6:
                    cycle_length = 0
                else:
                    cycle_length = cycle_length - 6
                cycle_score = -cycle_length
                #print cycle_score
                #print SA_score
                #print logp
                SA_score_norm = (SA_score - SA_mean) / SA_std
                logp_norm = (logp - logP_mean) / logP_std
                cycle_score_norm = (cycle_score - cycle_mean) / cycle_std
                score_one = SA_score_norm + logp_norm + cycle_score_norm
                score.append(score_one)

            else:
                score.append(-1000)
            score.append(new_compound[0])
            score.append(rank)

            comm.send(score, dest=0, tag=DONE)
            simulation_fi_time = time.time() - simulation_time
            print "simulation_fi_time:", simulation_fi_time
        if tag == EXIT:
            MPI.Abort(MPI.COMM_WORLD)

    comm.send(None, dest=0, tag=EXIT)
Beispiel #6
0
 def _reward(self):
     molecule = Chem.MolFromSmiles(self._state)
     if molecule is None:
         return -self.loss_fn(self.target_sas)
     sas = sascorer.calculateScore(molecule)
     return -self.loss_fn(sas - self.target_sas) * (self.discount_factor**(
         self.max_steps - self.num_steps_taken))
Beispiel #7
0
def check_node_type(new_compound):
    node_index = []
    valid_compound = []
    all_smile = []
    distance = []

    score = []
    for i in range(len(new_compound)):
        ko = Chem.MolFromSmiles(new_compound[i])
        if ko != None:
            SA_score = -sascorer.calculateScore(MolFromSmiles(new_compound[i]))
            cycle_list = nx.cycle_basis(
                nx.Graph(
                    rdmolops.GetAdjacencyMatrix(MolFromSmiles(
                        new_compound[i]))))
            if len(cycle_list) == 0:
                cycle_length = 0
            else:
                cycle_length = max([len(j) for j in cycle_list])
            if cycle_length <= 6:
                cycle_length = 0
            if cycle_length == 0:
                m = rdock_score(new_compound[i])
                if m < 10**10:
                    node_index.append(i)
                    valid_compound.append(new_compound[i])
                    score.append(m)

    return node_index, score, valid_compound
def calc_sa_score_mol(mol, verbose=False):
    if mol is None:
        if verbose:
            print("Error passing: %s" % smi)
        return None
    # Synthetic accessibility score
    return sascorer.calculateScore(mol)
Beispiel #9
0
def calc_score(smiles):
    if verify_sequence(smiles):
        try:
            molecule = MolFromSmiles(smiles)
            if Descriptors.MolWt(molecule) > 500:
                return -1e10
            current_log_P_value = Descriptors.MolLogP(molecule)
            current_SA_score = -sascorer.calculateScore(molecule)
            cycle_list = nx.cycle_basis(
                nx.Graph(rdmolops.GetAdjacencyMatrix(molecule)))
            if len(cycle_list) == 0:
                cycle_length = 0
            else:
                cycle_length = max([len(j) for j in cycle_list])
            if cycle_length <= 6:
                cycle_length = 0
            else:
                cycle_length = cycle_length - 6
            current_cycle_score = -cycle_length

            current_SA_score_normalized = (current_SA_score - SA_mean) / SA_std
            current_log_P_value_normalized = (current_log_P_value -
                                              logP_mean) / logP_std
            current_cycle_score_normalized = (current_cycle_score -
                                              cycle_mean) / cycle_std

            score = (current_SA_score_normalized +
                     current_log_P_value_normalized +
                     current_cycle_score_normalized)
            return score
        except Exception:
            return -1e10
    else:
        return -1e10
Beispiel #10
0
def compute_cost_logp(G, writefile="temp.txt"):
    cost = 0.0
    sas = ""
    logP = ""
    cycle_score = ""
    #m1 = nx_to_mol(G)
    if guess_correct_molecules_from_graph(G, writefile):
        m1 = Chem.MolFromMol2File(writefile)
        if m1 != None:
            s = Chem.MolToSmiles(m1)
            sas = -sascorer.calculateScore(m1)
            logP = Descriptors.MolLogP(m1)
            cycle_list = nx.cycle_basis(G)

            if len(cycle_list) == 0:
                cycle_length = 0
            else:
                cycle_length = max([len(j) for j in cycle_list])
            if cycle_length <= 6:
                cycle_length = 0.0
            else:
                cycle_length = cycle_length - 6.0
            cycle_score = -cycle_length
            cost = sas + logP + cycle_score
        else:
            print "Error: m1 is NONE"
            cost = ""
    else:
        print "Error: gues correct molecule"
        cost = ""
    # we want to define this property value such that low vales are better
    if cost != "":
        cost = 10.00 - cost
    return (sas, logP, cycle_score)
Beispiel #11
0
def calc_score(smiles):
    if verify_sequence(smiles):
        molecule = MolFromSmiles(smiles)
        current_log_P_value = Descriptors.MolLogP(molecule)
        current_SA_score = -sascorer.calculateScore(molecule)
        cycle_list = nx.cycle_basis(
            nx.Graph(rdmolops.GetAdjacencyMatrix(molecule)))
        if len(cycle_list) == 0:
            cycle_length = 0
        else:
            cycle_length = max([len(j) for j in cycle_list])
        if cycle_length <= 6:
            cycle_length = 0
        else:
            cycle_length = cycle_length - 6
        current_cycle_score = -cycle_length

        current_SA_score_normalized = (current_SA_score -
                                       np.mean(SA_scores)) / np.std(SA_scores)
        current_log_P_value_normalized = (
            current_log_P_value - np.mean(logP_values)) / np.std(logP_values)
        current_cycle_score_normalized = (
            current_cycle_score - np.mean(cycle_scores)) / np.std(cycle_scores)

        score = (current_SA_score_normalized + current_log_P_value_normalized +
                 current_cycle_score_normalized)
        return score
    else:
        raise ValueError("Error in calc_score: smiles is invalid.")
Beispiel #12
0
def logP_score(m):
    try:
        logp = Descriptors.MolLogP(m)
    except:
        print(m, Chem.MolToSmiles(m))
        sys.exit('failed to make a molecule')

    SA_score = -sascorer.calculateScore(m)
    #cycle_list = nx.cycle_basis(nx.Graph(rdmolops.GetAdjacencyMatrix(m)))
    cycle_list = m.GetRingInfo().AtomRings()  #remove networkx dependence
    if len(cycle_list) == 0:
        cycle_length = 0
    else:
        cycle_length = max([len(j) for j in cycle_list])
    if cycle_length <= 6:
        cycle_length = 0
    else:
        cycle_length = cycle_length - 6
    cycle_score = -cycle_length
    #print cycle_score
    #print SA_score
    #print logp
    SA_score_norm = (SA_score - SA_mean) / SA_std
    logp_norm = (logp - logP_mean) / logP_std
    cycle_score_norm = (cycle_score - cycle_mean) / cycle_std
    score_one = SA_score_norm + logp_norm + cycle_score_norm

    global max_score
    global count

    count += 1
    if score_one > max_score[0]:
        max_score = [score_one, Chem.MolToSmiles(m)]

    return score_one
Beispiel #13
0
def penalized_logp(molecule):
    """Calculates the penalized logP of a molecule.

  Refactored from
  https://github.com/wengong-jin/icml18-jtnn/blob/master/bo/run_bo.py
  See Junction Tree Variational Autoencoder for Molecular Graph Generation
  https://arxiv.org/pdf/1802.04364.pdf
  Section 3.2
  Penalized logP is defined as:
   y(m) = logP(m) - SA(m) - cycle(m)
   y(m) is the penalized logP,
   logP(m) is the logP of a molecule,
   SA(m) is the synthetic accessibility score,
   cycle(m) is the largest ring size minus by six in the molecule.

  Args:
    molecule: Chem.Mol. A molecule.

  Returns:
    Float. The penalized logP value.

  """
    log_p = Descriptors.MolLogP(molecule)
    sas_score = sascorer.calculateScore(molecule)
    largest_ring_size = get_largest_ring_size(molecule)
    cycle_score = max(largest_ring_size - 6, 0)
    return log_p - sas_score - cycle_score
Beispiel #14
0
def calc_score(mol):
    logP_mean = 2.457  # np.mean(logP_values)
    logP_std = 1.434  # np.std(logP_values)
    SA_mean = -3.053  # np.mean(SA_scores)
    SA_std = 0.834  # np.std(SA_scores)
    cycle_mean = -0.048  # np.mean(cycle_scores)
    cycle_std = 0.287  # np.std(cycle_scores)

    molecule = mol
    if Descriptors.MolWt(molecule) > 500:
        return -1e10
    current_log_P_value = Descriptors.MolLogP(molecule)
    current_SA_score = -sascorer.calculateScore(molecule)
    cycle_list = nx.cycle_basis(nx.Graph(
        rdmolops.GetAdjacencyMatrix(molecule)))
    if len(cycle_list) == 0:
        cycle_length = 0
    else:
        cycle_length = max([len(j) for j in cycle_list])
    if cycle_length <= 6:
        cycle_length = 0
    else:
        cycle_length = cycle_length - 6
    current_cycle_score = -cycle_length

    current_SA_score_normalized = (current_SA_score - SA_mean) / SA_std
    current_log_P_value_normalized = (current_log_P_value -
                                      logP_mean) / logP_std
    current_cycle_score_normalized = (current_cycle_score -
                                      cycle_mean) / cycle_std

    score = (current_SA_score_normalized + current_log_P_value_normalized +
             current_cycle_score_normalized)
 def SA_evaluate(self, valid_smiles):
     SA_lst = []
     for i in valid_smiles:
         mol = Chem.MolFromSmiles(i)
         SA_score = calculateScore(mol)
         SA_lst.append(SA_score)
     return SA_lst
Beispiel #16
0
def _rdkit_eval(entry: dict) -> dict:
    """Computes the chemical properties from RDKit,
    adds them to the input dictionary"""
    mol = Chem.MolFromSmiles(entry['smiles'])
    entry['logP'] = Crippen.MolLogP(mol)
    entry['QED'] = QED.qed(mol)
    entry['SA_score'] = calculateScore(mol)
    return entry
Beispiel #17
0
 def test1(self):
  testData = [x.strip().split('\t') for x in file('data/zim.100.txt').readlines()]
  testData.pop(0)
  for row in testData:
    smi = row[0]
    m = Chem.MolFromSmiles(smi)
    tgt = float(row[2])
    val = sascorer.calculateScore(m)
    self.failUnlessAlmostEqual(tgt,val,3)
Beispiel #18
0
def get_sa(x):

    try:

        return sascorer.calculateScore(Chem.MolFromSmiles(x))

    except:

        return -1
Beispiel #19
0
    def evaluate_individual(self, individual):
        if individual is None:
            return None
        else:

            mol_graph = MolFromSmiles(individual.to_aromatic_smiles())
            score = sascorer.calculateScore(mol_graph)

            return score, [score]
 def add_synthetic_accessibility_score(self):
     """create a list holding the 'synthetic accessibility score'
     reference: https://doi.org/10.1186/1758-2946-1-8
     module code is in: https://github.com/rdkit/rdkit/tree/master/Contrib/SA_Score"""
     sa_score = [sascorer.calculateScore(i) for i in list(self.df['mols'])]
     self.df['sa_score'] = sa_score
     print(
         f'Synthetic accessibility score range: {min(sa_score)} -  {max(sa_score)}'
     )
Beispiel #21
0
    def evaluate_individual(self, individual):

        mol_graph = MolFromSmiles(individual.to_aromatic_smiles())

        log_p = Descriptors.MolLogP(mol_graph)
        sas_score = sascorer.calculateScore(mol_graph)
        largest_ring_size = self.get_largest_ring_size(mol_graph)
        cycle_score = max(largest_ring_size - 6, 0)
        score = log_p - sas_score - cycle_score
        return score, [score]
Beispiel #22
0
 def test1(self):
     with open('data/zim.100.txt') as f:
         testData = [x.strip().split('\t') for x in f]
     testData.pop(0)
     for row in testData:
         smi = row[0]
         m = Chem.MolFromSmiles(smi)
         tgt = float(row[2])
         val = sascorer.calculateScore(m)
         self.assertAlmostEqual(tgt, val, 3)
def calc_sa_score_smi(smi, verbose=False):
    # Create RDKit mol object
    mol = Chem.MolFromSmiles(smi)
    if mol is None:
        if verbose:
            print("Error passing: %s" % smi)
        return None

    # Synthetic accessibility score
    return sascorer.calculateScore(mol)
Beispiel #24
0
 def test1(self):
   with open('data/zim.100.txt') as f:
     testData = [x.strip().split('\t') for x in f]
   testData.pop(0)
   for row in testData:
     smi = row[0]
     m = Chem.MolFromSmiles(smi)
     tgt = float(row[2])
     val = sascorer.calculateScore(m)
     self.assertAlmostEqual(tgt,val,3)
Beispiel #25
0
def check_node_type(new_compound, SA_mean, SA_std, logP_mean, logP_std,
                    cycle_mean, cycle_std):
    node_index = []
    valid_compound = []
    logp_value = []
    all_smile = []
    distance = []
    #print "SA_mean:",SA_mean
    #print "SA_std:",SA_std
    #print "logP_mean:",logP_mean
    #print "logP_std:",logP_std
    #print "cycle_mean:",cycle_mean
    #print "cycle_std:",cycle_std
    activity = []
    score = []

    for i in range(len(new_compound)):
        try:
            m = Chem.MolFromSmiles(str(new_compound[i]))
        except:
            print(None)
        if m != None and len(new_compound[i]) <= 81:
            try:
                logp = Descriptors.MolLogP(m)
            except:
                logp = -1000
            node_index.append(i)
            valid_compound.append(new_compound[i])
            SA_score = -sascorer.calculateScore(MolFromSmiles(new_compound[i]))
            cycle_list = nx.cycle_basis(
                nx.Graph(
                    rdmolops.GetAdjacencyMatrix(MolFromSmiles(
                        new_compound[i]))))
            if len(cycle_list) == 0:
                cycle_length = 0
            else:
                cycle_length = max([len(j) for j in cycle_list])
            if cycle_length <= 6:
                cycle_length = 0
            else:
                cycle_length = cycle_length - 6
            cycle_score = -cycle_length
            #print cycle_score
            #print SA_score
            #print logp
            SA_score_norm = (SA_score - SA_mean) / SA_std
            logp_norm = (logp - logP_mean) / logP_std
            cycle_score_norm = (cycle_score - cycle_mean) / cycle_std
            score_one = SA_score_norm + logp_norm + cycle_score_norm
            score.append(score_one)

        all_smile.append(new_compound[i])

    return node_index, score, valid_compound, all_smile
    def filtered_sa(self, valid_smiles):
        count = 0
        for i in valid_smiles:
            mol = Chem.MolFromSmiles(i)
            SA_score = calculateScore(mol)
            if SA_score > 5:
                valid_smiles.remove(i)
                count = count + 1
        print("unavaliable_SA_mol:%i" % count)

        return valid_smiles
def worker(syba_model, my_syba_model, scscore_model, smi):
    try:
        syba_score = syba_model.predict(smi)
    except:
        syba_score = 0
    try:
        my_syba_score = my_syba_model.predict(smi)
    except:
        my_syba_score = 0
    sa_score = sa.calculateScore(Chem.MolFromSmiles(smi))
    sc_score = scscore_model.get_score_from_smi(smi)[1]
    return syba_score, my_syba_score, sa_score, sc_score
Beispiel #28
0
 def test1(self):
     testData = [
         x.strip().split('\t')
         for x in file('data/zim.100.txt').readlines()
     ]
     testData.pop(0)
     for row in testData:
         smi = row[0]
         m = Chem.MolFromSmiles(smi)
         tgt = float(row[2])
         val = sascorer.calculateScore(m)
         self.failUnlessAlmostEqual(tgt, val, 3)
Beispiel #29
0
def simulation(chem_model, state, node):
    #time.sleep(10)
    val = [
        '\n', '&', 'C', '(', ')', 'c', '1', '2', 'o', '=', 'O', 'N', '3', 'F',
        '[C@@H]', 'n', '-', '#', 'S', 'Cl', '[O-]', '[C@H]', '[NH+]', '[C@]',
        's', 'Br', '/', '[nH]', '[NH3+]', '4', '[NH2+]', '[C@@]', '[N+]',
        '[nH+]', '\\', '[S@]', '5', '[N-]', '[n+]', '[S@@]', '[S-]', '6', '7',
        'I', '[n-]', 'P', '[OH+]', '[NH-]', '[P@@H]', '[P@@]', '[PH2]', '[P@]',
        '[P+]', '[S+]', '[o+]', '[CH2-]', '[CH-]', '[SH+]', '[O+]', '[s+]',
        '[PH+]', '[PH]', '8', '[S@@+]'
    ]
    all_posible = chem_kn_simulation(chem_model, state, val)
    generate_smile = predict_smile(all_posible, val)
    new_compound = make_input_smile(generate_smile)
    #score=[]
    kao = []
    try:
        m = Chem.MolFromSmiles(str(new_compound[0]))
        #print (str(new_compound[0]))
    except:
        m = None
    if m != None:
        try:
            logp = Descriptors.MolLogP(m)
        except:
            logp = -1000
        SA_score = -sascorer.calculateScore(MolFromSmiles(new_compound[0]))
        cycle_list = nx.cycle_basis(
            nx.Graph(
                rdmolops.GetAdjacencyMatrix(MolFromSmiles(new_compound[0]))))
        if len(cycle_list) == 0:
            cycle_length = 0
        else:
            cycle_length = max([len(j) for j in cycle_list])
        if cycle_length <= 6:
            cycle_length = 0
        else:
            cycle_length = cycle_length - 6
        cycle_score = -cycle_length
        SA_score_norm = SA_score  #(SA_score-SA_mean)/SA_std
        logp_norm = logp  #(logp-logP_mean)/logP_std
        cycle_score_norm = cycle_score  #(cycle_score-cycle_mean)/cycle_std
        score_one = SA_score_norm + logp_norm + cycle_score_norm
        #score.append(score_one)
        score = score_one / (1 + abs(score_one))
    else:
        #score.append(-1000)
        score = -1000 / (1 + 1000)
    #score.append(new_compound[0])
    #score.append(rank)

    return score
def _penalized_logp_cyclebasis(mol: Mol, dataset: str):
    log_p = Descriptors.MolLogP(mol)
    sa_score = sascorer.calculateScore(mol)

    cycle_list = nx.cycle_basis(nx.Graph(Chem.rdmolops.GetAdjacencyMatrix(mol)))
    largest_ring_size = max([len(j) for j in cycle_list]) if cycle_list else 0
    cycle_score = max(largest_ring_size - 6, 0)

    log_p = (log_p - LOGP_MEAN) / LOGP_STD
    sa_score = (sa_score - SASCORE_MEAN) / SASCORE_STD
    cycle_score = (cycle_score - CYCLEBASIS_CYCLESCORE_MEAN) / CYCLEBASIS_CYCLESCORE_STD

    return log_p - sa_score - cycle_score
def _penalized_logp_atomrings(mol: Mol, dataset: str):
    log_p = Descriptors.MolLogP(mol)
    sa_score = sascorer.calculateScore(mol)

    cycle_list = mol.GetRingInfo().AtomRings()
    largest_ring_size = max([len(j) for j in cycle_list]) if cycle_list else 0
    cycle_score = max(largest_ring_size - 6, 0)

    log_p = (log_p - LOGP_MEAN) / LOGP_STD
    sa_score = (sa_score - SASCORE_MEAN) / SASCORE_STD
    cycle_score = (cycle_score - ATOMRING_CYCLESCORE_MEAN) / ATOMRING_CYCLESCORE_STD

    return log_p - sa_score - cycle_score
Beispiel #32
0
 def runSA(ifn, ofn, error_ofn):
     err_ofs = open(error_ofn, 'w')
     with open(ofn, 'w') as ofs:
         with open(ifn, 'r') as ifs:
             lines = ifs.readlines()
             for line in lines:
                 try:
                     smi = line.split()[0]
                     m = Chem.MolFromSmiles(smi)
                     sa = sascorer.calculateScore(m)
                     ofs.write("%s\t%f\n" % (line.rstrip(), sa))
                 except Exception as inst:
                     err_ofs.write("%s\t%s\n" % (smi, inst.args))
     err_ofs.close()
Beispiel #33
0
def penalized_logp(molecule):
    """Calculates the penalized logP of a molecule.
    Refactored from
    https://github.com/wengong-jin/icml18-jtnn/blob/master/bo/run_bo.py
    See Junction Tree Variational Autoencoder for Molecular Graph Generation
    https://arxiv.org/pdf/1802.04364.pdf
    Section 3.2
    Penalized logP is defined as:
     y(m) = logP(m) - SA(m) - cycle(m)
     y(m) is the penalized logP,
     logP(m) is the logP of a molecule,
     SA(m) is the synthetic accessibility score,
     cycle(m) is the largest ring size minus by six in the molecule.
    Args:
      molecule: Chem.Mol. A molecule.
    Returns:
      Float. The penalized logP value.
    """
    log_p = Descriptors.MolLogP(molecule)
    sas_score = sascorer.calculateScore(molecule)
    largest_ring_size = get_largest_ring_size(molecule)
    cycle_score = max(largest_ring_size - 6, 0)
    return log_p - sas_score - cycle_score


## !!!!!!!!!!이거 써야하는지 check 필요!!!!!!!!##
# def num_long_cycles(mol):
#   """Calculate the number of long cycles.
#   Args:
#     mol: Molecule. A molecule.
#   Returns:
#     negative cycle length.
#   """
#   cycle_list = nx.cycle_basis(nx.Graph(Chem.rdmolops.GetAdjacencyMatrix(mol)))
#   if not cycle_list:
#     cycle_length = 0
#   else:
#     cycle_length = max([len(j) for j in cycle_list])
#   if cycle_length <= 6:
#     cycle_length = 0
#   else:
#     cycle_length = cycle_length - 6
#   return -cycle_length
#
#
# def penalized_logp(molecule):
#   log_p = Descriptors.MolLogP(molecule)
#   sas_score = SA_Score.sascorer.calculateScore(molecule)
#   cycle_score = num_long_cycles(molecule)
#   return log_p - sas_score + cycle_score
Beispiel #34
0
        # pred, uncert = sgp.predict(X_train, 0 * X_train)
        # error = np.sqrt(np.mean((pred - y_train)**2))
        # trainll = np.mean(sps.norm.logpdf(pred - y_train, scale = np.sqrt(uncert)))
        # print 'Train RMSE: ', error
        # print 'Train ll: ', trainll    

        next_inputs = sgp.batched_greedy_ei(50, np.min(X_train, 0), np.max(X_train, 0))        
        valid_smiles_final = decode_from_latent_space(next_inputs, model)
        save_object(valid_smiles_final, "%s/valid_smiles-seed-%d-iter-%d.dat" % (cmd_args.save_dir, args.seed, iteration))
        new_features = next_inputs

        scores = []
        for i in range(len(valid_smiles_final)):
            if valid_smiles_final[ i ] is not None:
                current_log_P_value = Descriptors.MolLogP(MolFromSmiles(valid_smiles_final[ i ]))
                current_SA_score = -sascorer.calculateScore(MolFromSmiles(valid_smiles_final[ i ]))
                cycle_list = nx.cycle_basis(nx.Graph(rdmolops.GetAdjacencyMatrix(MolFromSmiles(valid_smiles_final[ i ]))))
                if len(cycle_list) == 0:
                    cycle_length = 0
                else:
                    cycle_length = max([ len(j) for j in cycle_list ])
                if cycle_length <= 6:
                    cycle_length = 0
                else:
                    cycle_length = cycle_length - 6

                current_cycle_score = -cycle_length

                current_SA_score_normalized = (current_SA_score - np.mean(SA_scores)) / np.std(SA_scores)
                current_log_P_value_normalized = (current_log_P_value - np.mean(logP_values)) / np.std(logP_values)
                current_cycle_score_normalized = (current_cycle_score - np.mean(cycle_scores)) / np.std(cycle_scores)