コード例 #1
0
def fit_protein_pca(yaml_file):
    mdl_dir = yaml_file["mdl_dir"]
    mdl_params = yaml_file["mdl_params"]

    current_mdl_params={}
    for i in mdl_params.keys():
        if i.startswith("pca__"):
            current_mdl_params[i.split("pca__")[1]] = mdl_params[i]

    protein_pca_mdl = PCA(**current_mdl_params)

    for protein in yaml_file["protein_list"]:
        print("Fitting to protein %s" % protein)
        with enter_protein_data_dir(yaml_file, protein):
            featurized_traj = sorted(glob.glob("./%s/*.jl" %
                                               yaml_file["feature_dir"]), key=keynat)
            for f in featurized_traj:
                featurized_path = verboseload(f)
                try:
                    protein_pca_mdl.partial_fit(featurized_path)
                except:
                    pass
            print("Done partial fitting to protein %s" % protein)
    # dumping the pca_mdl
    pca_mdl_path = os.path.join(mdl_dir, "pca_mdl.pkl")
    verbosedump(protein_pca_mdl, pca_mdl_path)
    return