def do_convergence_test(trajectory, traj_type):
    path, file_name = os.path.split(trajectory["path"])
    traj_id = file_name.split(".")[0]
    base_path = os.path.join("convergence",traj_type,"%s"%traj_id)

    for n in range(1000,10000,1000):
        print "- Working with %s with %d frames"%(trajectory["path"],n)
        this_path = os.path.join(base_path, "%d"%n)
        create_dir(this_path)
        pdb_path = os.path.join(this_path,"%d.pdb"%n)
        extract_first_n_frames(n, trajectory["path"], pdb_path)
        script = copy.deepcopy(script_template)
        script["global"]["workspace"]["base"] = this_path
        script["data"]["files"] = [pdb_path]
#         script["clustering"]["evaluation"]["maximum_noise"] = trajectory["noise"]
#         script["clustering"]["evaluation"]["minimum_cluster_size"] = int(n/trajectory["max"])
#         script["clustering"]["evaluation"]["minimum_clusters"] = trajectory["min"]
#         script["clustering"]["evaluation"]["maximum_clusters"] = trajectory["max"]
        script_path = os.path.join(this_path,"script.json")
        save_dic_in_json(script, script_path)
        os.system("python %s %s "%(PYPROCT, script_path))
        os.system("rm %s"%pdb_path)
#         working_dir = os.path.join("comparisons","profasi","%svs%s"%(A_traj_id,B_traj_id))
#         create_dir(working_dir)
#         script["global"]["workspace"]["base"] = working_dir
#         script["data"]["files"] = [A_traj, B_traj]
#         script_path = os.path.join(working_dir,"script.json")
#         save_dic_in_json(script, script_path)
#         os.system("python %s %s "%(PYPROCT, script_path))
#         initial_j_offset = 0
#     initial_i_offset = 0

# Campari vs Profasi
for i in [15]: #range(0,len(profasi_trajs)):
    A_traj = campari_trajs[i]
    path, file = os.path.split(A_traj)
    A_traj_id = file.split(".")[0]

    B_traj = profasi_trajs[i]
    path, file = os.path.split(B_traj)
    B_traj_id = file.split(".")[0]

    script = copy.deepcopy(script_template)
    working_dir = os.path.join("comparisons","campari_vs_profasi","%svs%s"%(A_traj_id,B_traj_id))
    create_dir(working_dir)
    script["global"]["workspace"]["base"] = working_dir
    script["data"]["files"] = [{"file":A_traj,"base_selection":"resnum 3to53"},{"file": B_traj,"base_selection":"resnum 3to53"}]
    script_path = os.path.join(working_dir,"script.json")
    save_dic_in_json(script, script_path)
    os.system("python %s %s "%(PYPROCT, script_path))


Ejemplo n.º 3
0
    metrics = numpy.loadtxt(METRICS_FILE).T
    
    #--------------------------------  
    # Prepare the clustering for this guy
    #--------------------------------
    ## Load template and modify its contents for this case
    CLUSTERING_PATH = os.path.join(RESULTS_PATH,"%s_%s_clustering"%(options.drug, options.protein))
    MAX_CLUSTERS = 10
    SCRIPT_PATH = os.path.join(RESULTS_PATH,"clustering.json")
    OUT_FILE = os.path.join(RESULTS_PATH, "clustering.out")
    script = load_dic_in_json(options.template)
    script["global"]["workspace"]["base"] = CLUSTERING_PATH
    script["data"]["files"].append(FILTERED_PDB_FILE)
    script["clustering"]["evaluation"]["maximum_clusters"] = MAX_CLUSTERS
    save_dic_in_json(script, SCRIPT_PATH)
    os.system("python -m pyproct.main %s > %s"%(SCRIPT_PATH, OUT_FILE))
    best_clustering = Clustering.from_dic(get_best_clustering(CLUSTERING_PATH)["clustering"])
     
    #--------------------------------
    # Now calculate the values
    #--------------------------------
    results = {}
    for cluster in best_clustering.clusters:
        energies = metrics[1][cluster.all_elements]
        distances = metrics[0][cluster.all_elements]
        results[cluster.id] = {}
        results[cluster.id]["max_energy"] = numpy.max(energies)
        results[cluster.id]["min_energy"] = numpy.min(energies)
        results[cluster.id]["mean_energy"] = numpy.mean(energies)
        results[cluster.id]["mean_distance"] = numpy.mean(distances)
    m_handler = MatrixHandler()
    m_handler.distance_matrix = CondensedMatrix(matrix_data)
    matrix_file = os.path.join(base_dir, TENERGY_SPAWN_MATRIX)
    m_handler.saveMatrix(matrix_file)

    #######################################################################################################################
    # Cluster by metrics
    #######################################################################################################################
    print "* Spawning - totalE clustering"
    be_rmsd_clustering_script_path = os.path.join(base_dir, 'scripts', CLUSTERING_SPAWN_TOTE_SCRIPT)
    working_directory = os.path.join(base_dir, TOTALE_SPAWN_WORKSPACE)
    params = load_dic_in_json(be_rmsd_clustering_script_path)
    params['global']['workspace']['base'] = working_directory
    params['data']['files'] = [os.path.join(os.getcwd(), traj_pdb)]
    params['data']['matrix']['parameters']['path'] = matrix_file
    save_dic_in_json(params, be_rmsd_clustering_script_path)
    use_pyproct(working_directory, be_rmsd_clustering_script_path)

    #######################################################################################################################
    # Get 5 representatives. 2 strategies.
    #######################################################################################################################

    #####################################################################
    #####################################################################
    # Work only with best clustering/ find best cluster
    #####################################################################
    #####################################################################
    best_clustering = get_best_clustering(working_directory)
    best_clustering = Clustering.from_dic(best_clustering["clustering"])
    scores = []
    for a_cluster in best_clustering.clusters: