Example #1
0
def get_best_clustering(results_file):
    """
    Loads and returns the best clustering from a results file.
    """
    results = convert_to_utf8(json.loads(open(results_file).read()))
    best_clustering_id = results["best_clustering"]
    best_clustering_dic = results["selected"][best_clustering_id]
    return Clustering.from_dic(best_clustering_dic["clustering"])
def get_best_clustering(results_file):
    """
    Loads and returns the best clustering from a results file.
    """
    results = convert_to_utf8(json.loads(open(results_file).read()))
    best_clustering_id =results["best_clustering"]
    best_clustering_dic = results["selected"][best_clustering_id]
    return Clustering.from_dic(best_clustering_dic["clustering"])
Example #3
0
 def load_clustering(self, parameters):
     best_clustering = {
         "clustering":
         Clustering.from_dic(
             parameters["clustering"]["generation"]["parameters"])
     }
     return ("loaded_clustering", {
         "loaded_clustering": best_clustering
     }, {}, None)
Example #4
0
 def testSilhouetteSpecialCase(self):
     clustering = Clustering.from_dic(data.clustering_01)
     mh = MatrixHandler({
                             "method": "load",
                             "parameters":{
                                 "path": "data/example_clustering_1_matrix"
                             }
                         }
     )
     s = SilhouetteCoefficientCalculator()
     matrix =  mh.create_matrix(None)
     print s.evaluate(clustering, matrix)
Example #5
0
    def get_best_clustering(self, parameters):
        best_clustering = None
        ##############################
        # Do the actual clustering
        ##############################
        clustering_results = None

        ##############################
        # Load the clustering
        ##############################
        if parameters["clustering"]["generation"]["method"] == "load":
            best_clustering = {"clustering":Clustering.from_dic(parameters["clustering"]["generation"])}

        ##############################
        # Or generate it
        ##############################
        elif parameters["clustering"]["generation"]["method"] == "generate":
            clustering_results = ClusteringProtocol(self.timer, self.observer).run(parameters, self.matrixHandler,
                                                                                                self.workspaceHandler,
                                                                                                self.trajectoryHandler)
            best_clustering = None
            abort = False

            if clustering_results != None:
                best_clustering_id, selected, not_selected, scores = clustering_results  # @UnusedVariable

                #################################
                # Abort if no clusters were found
                #################################
                if best_clustering_id is None:
                    abort = True

                best_clustering = selected[best_clustering_id]
            else:
                abort = True

            if abort:
                self.notify("SHUTDOWN", "Improductive clustering search. Relax evaluation constraints.")
                print "[FATAL Driver:get_best_clustering] Improductive clustering search. Exiting..."
                exit()

        return best_clustering, clustering_results
Example #6
0
 def load_clustering(self, parameters):
     best_clustering = {"clustering":Clustering.from_dic(parameters["clustering"]["generation"]["parameters"])}
     return ( "loaded_clustering", {"loaded_clustering":best_clustering}, {}, None)
Example #7
0
            "minimum_clusters"], parameters["clustering"]["evaluation"][
                "maximum_clusters"]
        if dataset_name in data.criteria:
            parameters["clustering"]["evaluation"][
                "evaluation_criteria"] = data.criteria[dataset_name]
        else:
            parameters["clustering"]["evaluation"][
                "evaluation_criteria"] = data.criteria["default"]
        Driver(Observer()).run(parameters)

    for dataset_name in ['concentric_circles']:  #data.all_datasets:
        results_file = os.path.join(os.path.abspath("./tmp/%s" % dataset_name),
                                    "results/results.json")
        results = convert_to_utf8(json.loads(open(results_file).read()))
        best = results["best_clustering"]
        clustering = Clustering.from_dic(
            results["selected"][best]["clustering"])
        vtools.show_2D_dataset_clusters(
            all_observations[dataset_name], clustering, scale=20,
            margin=20).save("clustering_images/%s.jpg" % dataset_name, "JPEG")
        print dataset_name, results["selected"][best]["type"], results[
            "selected"][best]["clustering"]["number_of_clusters"], results[
                "selected"][best]["evaluation"][
                    "Noise level"],  #results["selected"][best]["parameters"]
        # look for the best criteria
        criteria_scores = []
        for criteria in results["scores"]:
            criteria_scores.append(
                (results["scores"][criteria][best], criteria))
        print criteria_scores

    print "\nDone"
Example #8
0
        parameters = ProtocolParameters.get_params_from_json(script_str)
        # And change another hypothesis stuff
        parameters["clustering"]["evaluation"]["maximum_noise"] = data.noise[dataset_name]
        parameters["clustering"]["evaluation"]["minimum_cluster_size"] = data.minsize[dataset_name]
        parameters["clustering"]["evaluation"]["minimum_clusters"] = data.num_cluster_ranges[dataset_name][0]
        parameters["clustering"]["evaluation"]["maximum_clusters"] = data.num_cluster_ranges[dataset_name][1]
        print parameters["clustering"]["evaluation"]["minimum_clusters"], parameters["clustering"]["evaluation"]["maximum_clusters"]
        if dataset_name in data.criteria:
            parameters["clustering"]["evaluation"]["evaluation_criteria"] = data.criteria[dataset_name]
        else:
            parameters["clustering"]["evaluation"]["evaluation_criteria"] = data.criteria["default"]
        Driver(Observer()).run(parameters)

    for dataset_name in ['concentric_circles']: #data.all_datasets:
        results_file = os.path.join(os.path.abspath("./tmp/%s"%dataset_name),"results/results.json")
        results = convert_to_utf8(json.loads(open(results_file).read()))
        best = results["best_clustering"]
        clustering = Clustering.from_dic(results["selected"][best]["clustering"])
        vtools.show_2D_dataset_clusters(all_observations[dataset_name],
                                        clustering,
                                        scale = 20,
                                        margin = 20).save("clustering_images/%s.jpg"%dataset_name,
                                                 "JPEG")
        print dataset_name,results["selected"][best]["type"],results["selected"][best]["clustering"]["number_of_clusters"], results["selected"][best]["evaluation"]["Noise level"],#results["selected"][best]["parameters"]
        # look for the best criteria
        criteria_scores = []
        for criteria in results["scores"]:
            criteria_scores.append((results["scores"][criteria][best],criteria))
        print criteria_scores

    print "\nDone"
        data[str(i)] = []
        for j in range(0, N):
            if (i, j) in cluster.percents:
                data[str(i)].append(cluster.percents[(i, j)])
            else:
                data[str(i)].append(0)

    return data


if __name__ == '__main__':
    results = convert_to_utf8(json.loads(open(sys.argv[1]).read()))
    best_clustering_id = results["best_clustering"]
    best_clustering_dic = results["selected"][best_clustering_id]
    num_clusters = best_clustering_dic["clustering"]["number_of_clusters"]
    clustering = Clustering.from_dic(best_clustering_dic["clustering"])
    file_frames = int(sys.argv[2])

    # generate a map element -> interpolation
    index_to_interpolation = {}
    acc = 0
    for i in range(0, file_frames - 1):
        for j in range(i + 1, file_frames):
            for k in range(20):
                index_to_interpolation[acc] = (i, j)
                acc += 1

    for cluster in clustering.clusters:
        colors = iter(cm.rainbow(np.linspace(0, 1, N)))
        theta = radar_factory(N, frame='polygon')
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    # Plot protein
    pdb = prody.parsePDB(params["data"]["files"][0])
    if options.show_protein:
        pdb_backbone = pdb.select("name CA").getCoordsets()[0] # "backbone not hetero"
        ax.plot(pdb_backbone.T[0], pdb_backbone.T[1], pdb_backbone.T[2])

    # Get geometric centers and plot ligands
    ligand_coords = pdb.select(params["data"]["matrix"]["parameters"]["body_selection"]).getCoordsets()

    # Get clustering
    if options.clustering_to_see is None:
        options.clustering_to_see = results["best_clustering"]
    try:
        clustering = Clustering.from_dic(results["selected"][options.clustering_to_see]["clustering"])
        # Print some info
        print_cluster_info("selected", options.clustering_to_see, results)
    except:
        clustering = Clustering.from_dic(results["not_selected"][options.clustering_to_see]["clustering"])
        # Print some info
        print_cluster_info("not_selected", options.clustering_to_see, results)

    # Show all clusters
    colors = iter(cm.rainbow(numpy.linspace(0, 1, len(clustering.clusters))))
    for cluster in clustering.clusters:
        centers = []
        for i,element in enumerate(cluster.all_elements):
            if options.stride is None or i%options.stride == 0:
                coords = ligand_coords[element]
                centers.append(coords.mean(0))
Example #11
0
 
 #--------------------------------  
 # Prepare the clustering for this guy
 #--------------------------------
 ## Load template and modify its contents for this case
 CLUSTERING_PATH = os.path.join(RESULTS_PATH,"%s_%s_clustering"%(options.drug, options.protein))
 MAX_CLUSTERS = 10
 SCRIPT_PATH = os.path.join(RESULTS_PATH,"clustering.json")
 OUT_FILE = os.path.join(RESULTS_PATH, "clustering.out")
 script = load_dic_in_json(options.template)
 script["global"]["workspace"]["base"] = CLUSTERING_PATH
 script["data"]["files"].append(FILTERED_PDB_FILE)
 script["clustering"]["evaluation"]["maximum_clusters"] = MAX_CLUSTERS
 save_dic_in_json(script, SCRIPT_PATH)
 os.system("python -m pyproct.main %s > %s"%(SCRIPT_PATH, OUT_FILE))
 best_clustering = Clustering.from_dic(get_best_clustering(CLUSTERING_PATH)["clustering"])
  
 #--------------------------------
 # Now calculate the values
 #--------------------------------
 results = {}
 for cluster in best_clustering.clusters:
     energies = metrics[1][cluster.all_elements]
     distances = metrics[0][cluster.all_elements]
     results[cluster.id] = {}
     results[cluster.id]["max_energy"] = numpy.max(energies)
     results[cluster.id]["min_energy"] = numpy.min(energies)
     results[cluster.id]["mean_energy"] = numpy.mean(energies)
     results[cluster.id]["mean_distance"] = numpy.mean(distances)
     results[cluster.id]["population"] = len(cluster.all_elements)
      
Example #12
0
        for j in range(0,N):
            if (i,j) in cluster.percents:
                data[str(i)].append( cluster.percents[(i,j)])
            else:
                data[str(i)].append(0)

    return data



if __name__ == '__main__':
    results = convert_to_utf8(json.loads(open(sys.argv[1]).read()))
    best_clustering_id =results["best_clustering"]
    best_clustering_dic = results["selected"][best_clustering_id]
    num_clusters = best_clustering_dic["clustering"]["number_of_clusters"]
    clustering = Clustering.from_dic(best_clustering_dic["clustering"])
    file_frames = int(sys.argv[2])

    # generate a map element -> interpolation
    index_to_interpolation = {}
    acc = 0
    for i in range(0, file_frames-1):
        for j in range(i+1, file_frames):
            for k in range(20):
                index_to_interpolation[acc] = (i,j)
                acc += 1


    for cluster in clustering.clusters:
        colors = iter(cm.rainbow(np.linspace(0, 1, N)))
        theta = radar_factory(N, frame='polygon')
#
# RCD_script = copy.deepcopy(template_script)
# RCD_script["global"]["workspace"]["base"] = os.path.join("RDCvsRMSD", "campari", "RDC", "clustering")
# RCD_script["data"]["matrix"]["method"] = "load"
# RCD_script["data"]["matrix"]["parameters"]["path"] = os.path.join("RDCvsRMSD", "campari", "RDC", "matrix")
# RCD_script["data"]["files"].append(os.path.join("RDCvsRMSD", "campari.pdb"))
#
# tools.save_dic_in_json(RCD_script, os.path.join("RDCvsRMSD", "campari", "RDC", "script.json"))
# tools.save_dic_in_json(RMSD_script, os.path.join("RDCvsRMSD", "campari", "RMSD", "script.json"))
#
# os.system("python %s %s "%(PYPROCT, os.path.join("RDCvsRMSD", "campari", "RDC", "script.json")))
# os.system("python %s %s "%(PYPROCT, os.path.join("RDCvsRMSD", "campari", "RMSD", "script.json")))


results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "RDC_refined", "clustering","results","results.json"))
RDC_clustering = Clustering.from_dic(results["selected"][results["best_clustering"]]["clustering"]).gen_class_list(number_of_elements = 5926)

results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "RMSD_refined", "clustering","results","results.json"))
RMSD_clustering = Clustering.from_dic(results["selected"][results["best_clustering"]]["clustering"]).gen_class_list(number_of_elements = 5926)

results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json"))
Dihedral_clustering = Clustering.from_dic(results["selected"][results["best_clustering"]]["clustering"]).gen_class_list(number_of_elements = 5926)

results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json"))
Dihedral_bad_score = Clustering.from_dic(results["selected"]["clustering_0098"]["clustering"]).gen_class_list(number_of_elements = 5926)

results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json"))
Dihedral_medium_score = Clustering.from_dic(results["selected"]["clustering_0056"]["clustering"]).gen_class_list(number_of_elements = 5926)

results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json"))
Dihedral_fairly_good_score = Clustering.from_dic(results["selected"]["clustering_0212"]["clustering"]).gen_class_list(number_of_elements = 5926)
Example #14
0
    # Plot protein
    pdb = prody.parsePDB(params["data"]["files"][0])
    if options.show_protein:
        pdb_backbone = pdb.select("name CA").getCoordsets()[
            0]  # "backbone not hetero"
        ax.plot(pdb_backbone.T[0], pdb_backbone.T[1], pdb_backbone.T[2])

    # Get geometric centers and plot ligands
    ligand_coords = pdb.select(params["data"]["matrix"]["parameters"]
                               ["body_selection"]).getCoordsets()

    # Get clustering
    if options.clustering_to_see is None:
        options.clustering_to_see = results["best_clustering"]
    try:
        clustering = Clustering.from_dic(
            results["selected"][options.clustering_to_see]["clustering"])
        # Print some info
        print_cluster_info("selected", options.clustering_to_see, results)
    except:
        clustering = Clustering.from_dic(
            results["not_selected"][options.clustering_to_see]["clustering"])
        # Print some info
        print_cluster_info("not_selected", options.clustering_to_see, results)

    # Show all clusters
    colors = iter(cm.rainbow(numpy.linspace(0, 1, len(clustering.clusters))))
    for cluster in clustering.clusters:
        centers = []
        for i, element in enumerate(cluster.all_elements):
            if options.stride is None or i % options.stride == 0:
                coords = ligand_coords[element]