def sumScore(df, key, outputs, output_file_name): print('') print('Step 6') print('###############################') print('正在统计化合物打分!!!') df_tmp = pd.DataFrame(columns=[key, 'score']) scores = [] drugbank_id_list = [] for output in outputs: output_result_name = output.split('.')[0] + '_score.txt' score = getScoreFromSmina(output_result_name) drugbank_id = output.split('/')[-1].split('_')[0] scores.append(score) drugbank_id_list.append(drugbank_id) df_tmp[key] = drugbank_id_list df_tmp['score'] = scores df_final = pd.merge(df,df_tmp, how='left', left_on=key, right_on=key) df_final.score.fillna(999, inplace=True) df_final.sort_values('score', ascending=True, inplace=True) print('正在生成化合物结构!!!') PandasTools.SaveXlsxFromFrame(df_final, output_file_name, molCol='mol') print('共得到%s个化合物得分!!!' %(df_final.shape[0])) print('###############################') return df_final
def makespreadsheet(thesmiles): mylist = thesmiles.split('\n') df = pd.DataFrame({'SMILES': mylist}) df['Mol Image'] = [Chem.MolFromSmiles(s) for s in df['SMILES']] PandasTools.SaveXlsxFromFrame(df, 'test.xlsx', molCol='Mol Image')
'Oc(cccc1)c1O', 'OC(C(C1)C1(Br)Br)=O', 'Cc(cc1)ccc1O', 'Oc(cc1)ccc1Cl', 'OC(c1cocc1)=O', 'CC(C)c1nnn[nH]1', 'CN(C=C1)C=CC1=N', 'CCc1cccnn1', 'C[C@H]([C@H]1NC)[C@@H]1NC', 'CCCc1ncc[nH]1' ] # 化合物のラベルを作成 label_list = ['sample_{}'.format(i) for i in range(len(smiles_list))] # molオブジェクトのリストを作成 mols_list = [Chem.MolFromSmiles(smile) for smile in smiles_list] # RDkit記述子の作成 descriptor_names = [ descriptor_name[0] for descriptor_name in Descriptors._descList[:5] ] descriptor_calculation = MoleculeDescriptors.MolecularDescriptorCalculator( descriptor_names) RDkit = [ descriptor_calculation.CalcDescriptors(mol_temp) for mol_temp in mols_list ] df = pd.DataFrame(RDkit, columns=descriptor_names, index=label_list) df['smiles'] = smiles_list # DataFrameへのImageの追加とエクセルファイルでの出力 PandasTools.AddMoleculeColumnToFrame(df, molCol='IMAGE', smilesCol='smiles') PandasTools.SaveXlsxFromFrame(df, 'data_frame.xlsx', molCol='IMAGE', size=(150, 150))