def prepare(args: dict, overwriting: bool): """Setup run by creating directories and log files. Args: args (dict): argparser arguments overwriting (bool): overwriting flag Returns: Tuple(DataFrame, DataFrame): Path to output and mapping_table subdirectories. """ output_dir_lsh = make_dir(args, "results_tmp", "lsh_folding", overwriting) mapping_table_dir = make_dir(args, "mapping_table", None, overwriting) create_log_files(output_dir_lsh) create_log_files(mapping_table_dir) load_config(args) load_key(args) method_params_fp = ConfigDict.get_parameters()["fingerprint"] method_params_lsh = ConfigDict.get_parameters()["lsh"] method_params = {**method_params_fp, **method_params_lsh} key = SecretDict.get_secrets()["key"] lshf = LSHFoldingCalculator.from_param_dict( secret=key, method_param_dict=method_params, verbosity=0) outcols = ["fp_feat", "fp_val", "fold_id", "success", "error_message"] out_types = ["object", "object", "object", "bool", "object"] dt = DfTransformer( lshf, input_columns={"canonical_smiles": "smiles"}, output_columns=outcols, output_types=out_types, success_column="success", nproc=args["number_cpu"], verbosity=0, ) return output_dir_lsh, mapping_table_dir, dt
def setUp(self): self.config = ConfigDict(config_path=Path( os.path.join(curDir, "reference_files", "example_parameters.json"))).get_parameters() self.keys = SecretDict(key_path=Path( os.path.join(curDir, "reference_files", "example_key.json"))).get_secrets()
def prepare(args): overwriting = True load_config(args) load_key(args) output_dir = make_dir(args, "reference_set", None, overwriting) key = SecretDict.get_secrets()["key"] method_params_standardizer = ConfigDict.get_parameters()["standardization"] st = Standardizer.from_param_dict( method_param_dict=method_params_standardizer, verbosity=0) outcols_st = ["canonical_smiles", "success", "error_message"] out_types_st = ["object", "bool", "object"] dt_standarizer = DfTransformer( st, input_columns={"smiles": "smiles"}, output_columns=outcols_st, output_types=out_types_st, success_column="success", nproc=1, verbosity=0, ) method_params_folding = ConfigDict.get_parameters()["scaffold_folding"] sa = ScaffoldFoldAssign.from_param_dict( secret=key, method_param_dict=method_params_folding, verbosity=0) outcols_sa = [ "murcko_smiles", "sn_smiles", "fold_id", "success", "error_message" ] out_types_sa = ["object", "object", "int", "bool", "object"] dt_fold = DfTransformer( sa, input_columns={"canonical_smiles": "smiles"}, output_columns=outcols_sa, output_types=out_types_sa, success_column="success", nproc=1, verbosity=0, ) method_params_descriptor = ConfigDict.get_parameters()["fingerprint"] dc = DescriptorCalculator.from_param_dict( secret=key, method_param_dict=method_params_descriptor, verbosity=0) outcols_dc = ["fp_feat", "fp_val", "success", "error_message"] out_types_dc = ["object", "object", "bool", "object"] dt_descriptor = DfTransformer( dc, input_columns={"canonical_smiles": "smiles"}, output_columns=outcols_dc, output_types=out_types_dc, success_column="success", nproc=1, verbosity=0, ) return output_dir, dt_standarizer, dt_fold, dt_descriptor
def prepare(args): """ Prepare output directories and instantiate df tansformer object for scaffold based folding Args: args (dict): argparser arguments Returns: Tuple(Path, DfTransformer): Path to output directory and instatitaed DfTranfomer for sccaffold folding """ output_dir = make_dir(args, "results_tmp", "folding", args["non_interactive"]) mapping_table_dir = make_dir(args, "mapping_table", None, args["non_interactive"]) create_log_files(output_dir) create_log_files(mapping_table_dir) load_config(args) load_key(args) key = SecretDict.get_secrets()["key"] method_params = ConfigDict.get_parameters()["scaffold_folding"] sa = ScaffoldFoldAssign.from_param_dict( secret=key, method_param_dict=method_params, verbosity=0 ) outcols = ["murcko_smiles", "sn_smiles", "fold_id", "success", "error_message"] out_types = ["object", "object", "int", "bool", "object"] dt = DfTransformer( sa, input_columns={"canonical_smiles": "smiles"}, output_columns=outcols, output_types=out_types, success_column="success", nproc=args["number_cpu"], verbosity=0, ) return output_dir, mapping_table_dir, dt