qed_values = np.loadtxt('../solo_qed_features_and_targets/qed_values.txt') qed_values_normalized = (np.array(qed_values) - np.mean(qed_values)) / np.std(qed_values) targets = qed_values_normalized reg_scores = [] # collect scores for objective function qed_scores = [] # collect scores for qed term in objective function for i in range(len(valid_smiles_final)): to_add = [] qed_store = [] if len(valid_smiles_final[i]) != 0: for j in range(0, len(valid_smiles_final[i])): current_qed_value = qed.default( MolFromSmiles(valid_smiles_final[i][j])) current_qed_value_normalized = ( current_qed_value - np.mean(qed_values)) / np.std(qed_values) score = current_qed_value_normalized to_add.append(-score) qed_store.append(current_qed_value) reg_scores.append(to_add) qed_scores.append(qed_store) print(i) print(valid_smiles_final) print(reg_scores) save_object(reg_scores, "results_QED_solo/reg_scores{}.dat".format(iteration))
smiles[i] = smiles[i].strip() # We load the auto-encoder preproc = lasp.PreProcessing(dataset='drugs') enc_dec = lasp.EncoderDecoder() encoder, decoder = enc_dec.get_functions() smiles_rdkit = [] for i in range(len(smiles)): smiles_rdkit.append(MolToSmiles(MolFromSmiles(smiles[i]))) print(i) qed_values = [] for i in range(len(smiles)): qed_values.append(qed.default(MolFromSmiles(smiles_rdkit[i]))) print(i) qed_values_normalized = (np.array(qed_values) - np.mean(qed_values)) / np.std(qed_values) smiles_one_hot_encoding = [] for i in range(len(smiles)): smiles_one_hot_encoding.append( preproc.smilelist_to_one_hot(smiles_rdkit[i])) print(i) latent_points = [] for i in range(len(smiles_one_hot_encoding)): latent_points.append(encoder([smiles_one_hot_encoding[i]])[0][0]) print(i)