def prepare(args: dict, overwriting: bool): """Setup run by creating directories and log files. Args: args (dict): argparser arguments overwriting (bool): overwriting flag Returns: Tuple(DataFrame, DataFrame): Path to output and mapping_table subdirectories. """ output_dir = make_dir(args, "results_tmp", "descriptors", overwriting) create_log_files(output_dir) load_config(args) load_key(args) method_params = ConfigDict.get_parameters()["fingerprint"] key = SecretDict.get_secrets()["key"] dc = DescriptorCalculator.from_param_dict(secret=key, method_param_dict=method_params, verbosity=0) outcols = ["fp_feat", "fp_val", "success", "error_message"] out_types = ["object", "object", "bool", "object"] dt = DfTransformer( dc, input_columns={"canonical_smiles": "smiles"}, output_columns=outcols, output_types=out_types, success_column="success", nproc=args["number_cpu"], verbosity=0, ) return output_dir, dt
def test_calculate_desc_multiple(self): tempFilePath = curDir / "output/tmp/ecfp_feat_multiple.csv" df_smiles = read_csv(curDir / "input/chembl/chembl_23_example_T2.csv", nrows=10) dc = DescriptorCalculator.from_param_dict( secret=self.keys["key"], method_param_dict=self.config["fingerprint"], verbosity=0, ) outcols = ["fp_feat", "fp_val", "success", "error_message"] out_types = ["object", "object", "bool", "object"] dt = DfTransformer( dc, input_columns={"smiles": "smiles"}, output_columns=outcols, output_types=out_types, success_column="success", nproc=1, verbosity=0, ) # df_ref = dt.process_dataframe(df_smiles)[0] #calculate reference fingperprints # df_ref.to_csv("unit_test/output/test_calculate_desc_y2.csv", index=False) #write reference fingperprints df_test = dt.process_dataframe(df_smiles)[0] df_test.to_csv(tempFilePath, index=False) # write test fingperprints result = filecmp.cmp("unit_test/output/test_calculate_desc_y2.csv", tempFilePath, shallow=False) self.assertEqual(result, True)
def test_scramble_desc_multiple_key(self): """test if scrambling is depending on the input key""" newKey = "melloddy_2" tempFilePathFeat = curDir / "output/tmp/ecfp_feat_scrambled_new_key.csv" df_smiles = read_csv(curDir / "input/chembl/chembl_23_example_T2.csv", nrows=10) dc = DescriptorCalculator.from_param_dict( secret=newKey, method_param_dict=self.config["fingerprint"], verbosity=0) outcols = ["fp_feat", "fp_val", "success", "error_message"] out_types = ["object", "object", "bool", "object"] dt = DfTransformer( dc, input_columns={"smiles": "smiles"}, output_columns=outcols, output_types=out_types, success_column="success", nproc=1, verbosity=0, ) df_test = dt.process_dataframe(df_smiles)[0] df_test.to_csv(tempFilePathFeat, index=False) # write test fingperprints result = filecmp.cmp( "unit_test/output/test_calculate_desc_y2.csv", tempFilePathFeat, shallow=False, ) self.assertEqual(result, False)
def prepare(args): overwriting = True load_config(args) load_key(args) output_dir = make_dir(args, "reference_set", None, overwriting) key = SecretDict.get_secrets()["key"] method_params_standardizer = ConfigDict.get_parameters()["standardization"] st = Standardizer.from_param_dict( method_param_dict=method_params_standardizer, verbosity=0) outcols_st = ["canonical_smiles", "success", "error_message"] out_types_st = ["object", "bool", "object"] dt_standarizer = DfTransformer( st, input_columns={"smiles": "smiles"}, output_columns=outcols_st, output_types=out_types_st, success_column="success", nproc=1, verbosity=0, ) method_params_folding = ConfigDict.get_parameters()["scaffold_folding"] sa = ScaffoldFoldAssign.from_param_dict( secret=key, method_param_dict=method_params_folding, verbosity=0) outcols_sa = [ "murcko_smiles", "sn_smiles", "fold_id", "success", "error_message" ] out_types_sa = ["object", "object", "int", "bool", "object"] dt_fold = DfTransformer( sa, input_columns={"canonical_smiles": "smiles"}, output_columns=outcols_sa, output_types=out_types_sa, success_column="success", nproc=1, verbosity=0, ) method_params_descriptor = ConfigDict.get_parameters()["fingerprint"] dc = DescriptorCalculator.from_param_dict( secret=key, method_param_dict=method_params_descriptor, verbosity=0) outcols_dc = ["fp_feat", "fp_val", "success", "error_message"] out_types_dc = ["object", "object", "bool", "object"] dt_descriptor = DfTransformer( dc, input_columns={"canonical_smiles": "smiles"}, output_columns=outcols_dc, output_types=out_types_dc, success_column="success", nproc=1, verbosity=0, ) return output_dir, dt_standarizer, dt_fold, dt_descriptor
def test_calculate_desc_val_single(self): tempFilePath = curDir / "output/tmp/ecfp_val.npy" dc = DescriptorCalculator.from_param_dict( secret=self.keys["key"], method_param_dict=self.config["fingerprint"], verbosity=0, ) fp = dc.calculate_single( "Cc1ccc(S(=O)(=O)Nc2ccc(-c3nc4cc(NS(=O)(=O)c5ccc(C)cc5)ccc4[nH]3)cc2)cc1" ) fp_val = fp[1] # np.save("unit_test/output/test_calculate_desc_val.npy",fp_val) #write reference fingperprints np.save(tempFilePath, fp_val) # write test fingperprints result = filecmp.cmp("unit_test/output/test_calculate_desc_val.npy", tempFilePath, shallow=False) self.assertEqual(result, True)