コード例 #1
0
def prepare(args: dict, overwriting: bool):
    """Setup run by creating directories and log files.

    Args:
        args (dict): argparser arguments
        overwriting (bool): overwriting flag

    Returns:
        Tuple(DataFrame, DataFrame): Path to output and mapping_table subdirectories.
    """

    output_dir = make_dir(args, "results_tmp", "descriptors", overwriting)
    create_log_files(output_dir)
    load_config(args)
    load_key(args)
    method_params = ConfigDict.get_parameters()["fingerprint"]
    key = SecretDict.get_secrets()["key"]
    dc = DescriptorCalculator.from_param_dict(secret=key,
                                              method_param_dict=method_params,
                                              verbosity=0)
    outcols = ["fp_feat", "fp_val", "success", "error_message"]
    out_types = ["object", "object", "bool", "object"]
    dt = DfTransformer(
        dc,
        input_columns={"canonical_smiles": "smiles"},
        output_columns=outcols,
        output_types=out_types,
        success_column="success",
        nproc=args["number_cpu"],
        verbosity=0,
    )
    return output_dir, dt
コード例 #2
0
    def test_calculate_desc_multiple(self):
        tempFilePath = curDir / "output/tmp/ecfp_feat_multiple.csv"
        df_smiles = read_csv(curDir / "input/chembl/chembl_23_example_T2.csv",
                             nrows=10)

        dc = DescriptorCalculator.from_param_dict(
            secret=self.keys["key"],
            method_param_dict=self.config["fingerprint"],
            verbosity=0,
        )
        outcols = ["fp_feat", "fp_val", "success", "error_message"]
        out_types = ["object", "object", "bool", "object"]
        dt = DfTransformer(
            dc,
            input_columns={"smiles": "smiles"},
            output_columns=outcols,
            output_types=out_types,
            success_column="success",
            nproc=1,
            verbosity=0,
        )
        # df_ref = dt.process_dataframe(df_smiles)[0] #calculate reference fingperprints
        # df_ref.to_csv("unit_test/output/test_calculate_desc_y2.csv", index=False)   #write reference fingperprints

        df_test = dt.process_dataframe(df_smiles)[0]
        df_test.to_csv(tempFilePath, index=False)  # write test fingperprints
        result = filecmp.cmp("unit_test/output/test_calculate_desc_y2.csv",
                             tempFilePath,
                             shallow=False)

        self.assertEqual(result, True)
コード例 #3
0
    def test_scramble_desc_multiple_key(self):
        """test if scrambling is depending on the input key"""
        newKey = "melloddy_2"

        tempFilePathFeat = curDir / "output/tmp/ecfp_feat_scrambled_new_key.csv"
        df_smiles = read_csv(curDir / "input/chembl/chembl_23_example_T2.csv",
                             nrows=10)

        dc = DescriptorCalculator.from_param_dict(
            secret=newKey,
            method_param_dict=self.config["fingerprint"],
            verbosity=0)
        outcols = ["fp_feat", "fp_val", "success", "error_message"]
        out_types = ["object", "object", "bool", "object"]
        dt = DfTransformer(
            dc,
            input_columns={"smiles": "smiles"},
            output_columns=outcols,
            output_types=out_types,
            success_column="success",
            nproc=1,
            verbosity=0,
        )
        df_test = dt.process_dataframe(df_smiles)[0]
        df_test.to_csv(tempFilePathFeat,
                       index=False)  # write test fingperprints
        result = filecmp.cmp(
            "unit_test/output/test_calculate_desc_y2.csv",
            tempFilePathFeat,
            shallow=False,
        )
        self.assertEqual(result, False)
コード例 #4
0
def prepare(args):
    overwriting = True

    load_config(args)
    load_key(args)
    output_dir = make_dir(args, "reference_set", None, overwriting)
    key = SecretDict.get_secrets()["key"]
    method_params_standardizer = ConfigDict.get_parameters()["standardization"]
    st = Standardizer.from_param_dict(
        method_param_dict=method_params_standardizer, verbosity=0)
    outcols_st = ["canonical_smiles", "success", "error_message"]
    out_types_st = ["object", "bool", "object"]
    dt_standarizer = DfTransformer(
        st,
        input_columns={"smiles": "smiles"},
        output_columns=outcols_st,
        output_types=out_types_st,
        success_column="success",
        nproc=1,
        verbosity=0,
    )

    method_params_folding = ConfigDict.get_parameters()["scaffold_folding"]
    sa = ScaffoldFoldAssign.from_param_dict(
        secret=key, method_param_dict=method_params_folding, verbosity=0)
    outcols_sa = [
        "murcko_smiles", "sn_smiles", "fold_id", "success", "error_message"
    ]
    out_types_sa = ["object", "object", "int", "bool", "object"]
    dt_fold = DfTransformer(
        sa,
        input_columns={"canonical_smiles": "smiles"},
        output_columns=outcols_sa,
        output_types=out_types_sa,
        success_column="success",
        nproc=1,
        verbosity=0,
    )

    method_params_descriptor = ConfigDict.get_parameters()["fingerprint"]
    dc = DescriptorCalculator.from_param_dict(
        secret=key, method_param_dict=method_params_descriptor, verbosity=0)
    outcols_dc = ["fp_feat", "fp_val", "success", "error_message"]
    out_types_dc = ["object", "object", "bool", "object"]
    dt_descriptor = DfTransformer(
        dc,
        input_columns={"canonical_smiles": "smiles"},
        output_columns=outcols_dc,
        output_types=out_types_dc,
        success_column="success",
        nproc=1,
        verbosity=0,
    )

    return output_dir, dt_standarizer, dt_fold, dt_descriptor
コード例 #5
0
    def test_calculate_desc_val_single(self):
        tempFilePath = curDir / "output/tmp/ecfp_val.npy"
        dc = DescriptorCalculator.from_param_dict(
            secret=self.keys["key"],
            method_param_dict=self.config["fingerprint"],
            verbosity=0,
        )
        fp = dc.calculate_single(
            "Cc1ccc(S(=O)(=O)Nc2ccc(-c3nc4cc(NS(=O)(=O)c5ccc(C)cc5)ccc4[nH]3)cc2)cc1"
        )
        fp_val = fp[1]
        # np.save("unit_test/output/test_calculate_desc_val.npy",fp_val)   #write reference fingperprints
        np.save(tempFilePath, fp_val)  # write test fingperprints

        result = filecmp.cmp("unit_test/output/test_calculate_desc_val.npy",
                             tempFilePath,
                             shallow=False)

        self.assertEqual(result, True)