Ejemplo n.º 1
0
def __drawOrigCoords(origCoords):
    """Display the original pre-rotamerize coordinates as a generic object
    
    ARGUMENTS:
        origCoords - a chain object containing the atoms to be drawn
    RETURNS:
        drawNum - the generic object number
    EFFECTS:
        creates and displays a new Coot generic object
    """

    #create a new generic display object
    drawNum = new_generic_object_number("Pre-rotamerize coordinates")
    set_display_generic_object(drawNum, 1)

    lineList = []  #a list of all the bonds to be drawn

    #draw bonds for each nucleotide
    prevO3 = None
    for curNuc in origCoords.nucs:

        #draw each bond if both atoms are present
        for (atom1, atom2) in (BOND_LIST_FULL["backbone"] +
                               BOND_LIST_FULL[curNuc.type]):
            if curNuc.hasAtom(atom1) and curNuc.hasAtom(atom2):
                atom1Coords = curNuc.atoms[atom1]
                atom2Coords = curNuc.atoms[atom2]
                lineList.append(atom1Coords + atom2Coords)

        #draw a bond between O5' and the previous phosphate if both atoms are present
        if prevO3 is not None and curNuc.hasAtom("P"):
            lineList.append(prevO3 + curNuc.atoms["P"])

        #update prevPhos
        if curNuc.hasAtom("O3'"):
            prevO3 = curNuc.atoms["O3'"]
        else:
            prevO3 = None

    #actually draw the bonds
    for curLine in lineList:
        to_generic_object_add_line(drawNum, ORIG_COORDS_COLOR, 6, *curLine)
    graphics_draw()

    return drawNum
Ejemplo n.º 2
0
def __drawOrigCoords(origCoords):
    """Display the original pre-rotamerize coordinates as a generic object
    
    ARGUMENTS:
        origCoords - a chain object containing the atoms to be drawn
    RETURNS:
        drawNum - the generic object number
    EFFECTS:
        creates and displays a new Coot generic object
    """
    
    #create a new generic display object
    drawNum = new_generic_object_number("Pre-rotamerize coordinates")
    set_display_generic_object(drawNum, 1)
    
    lineList = [] #a list of all the bonds to be drawn
    
    #draw bonds for each nucleotide
    prevO3 = None
    for curNuc in origCoords.nucs:
        
        #draw each bond if both atoms are present
        for (atom1, atom2) in (BOND_LIST_FULL["backbone"] + BOND_LIST_FULL[curNuc.type]):
            if curNuc.hasAtom(atom1) and curNuc.hasAtom(atom2):
                atom1Coords = curNuc.atoms[atom1]
                atom2Coords = curNuc.atoms[atom2]
                lineList.append(atom1Coords + atom2Coords)
        
        #draw a bond between O5' and the previous phosphate if both atoms are present
        if prevO3 is not None and curNuc.hasAtom("P"):
            lineList.append(prevO3 + curNuc.atoms["P"])
        
        #update prevPhos
        if curNuc.hasAtom("O3'"):
            prevO3 = curNuc.atoms["O3'"]
        else:
            prevO3 = None
    
    #actually draw the bonds
    for curLine in lineList:
        to_generic_object_add_line(drawNum, ORIG_COORDS_COLOR, 6, *curLine)
    graphics_draw()
    
    return drawNum
Ejemplo n.º 3
0
def cluster_and_display_waters(site_number, w_positions_np):
    def optimize_n(positions_np, n_data):

        bic = {}
        for n in [x + 1 for x in range(20)]:
            if n < len(positions_np):
                gmm = mixture.GMM(n_components=n, covariance_type="spherical", n_iter=20)
                gmm.fit(positions_np)
                score = sum(gmm.score(positions_np))
                lambda_c = 15  # 3 too few
                bic_l = score - lambda_c * 0.5 * math.log(n_data) * n
                bic[n] = bic_l

        for key in bic:
            print "   water bic", key, bic[key]

        key, value = max(bic.iteritems(), key=lambda x: x[1])
        return key

    n_components = optimize_n(w_positions_np, len(w_positions_np))
    print "optimize_n for water:::::::::::::", n_components
    dpgmm = mixture.GMM(n_components, covariance_type="spherical", n_iter=40)
    dpgmm.fit(w_positions_np)

    cluster_assignments = dpgmm.predict(w_positions_np)

    color_list = [
        "green",
        "greentint",
        "sea",
        "yellow",
        "yellowtint",
        "aquamarine",
        "forestgreen",
        "goldenrod",
        "orangered",
        "orange",
        "cyan",
        "red",
        "blue",
    ]
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)

    means = dpgmm.means_
    cvs = dpgmm._get_covars()
    weights = dpgmm.weights_

    obj = coot.new_generic_object_number("CFC Site " + str(site_number) + " selected waters")
    for i, pos in enumerate(w_positions_np):
        mean = means[cluster_assignments[i]]
        # reject spheres at the origin - (from DPGMM strangeness)
        d = mean[0] * mean[0] + mean[1] * mean[1] + mean[2] * mean[2]
        if d > 1.0:
            col = color_list[cluster_assignments[i]]
            coot.to_generic_object_add_point(obj, col, 10, pos[0], pos[1], pos[2])
        else:
            print "reject prediction", i, "for cluster", cluster_assignments[i]

    # set_display_generic_object(obj, 1)

    obj = coot.new_generic_object_number("CFC Site " + str(site_number) + " water cluster means")

    for i, cv in enumerate(cvs):

        mean = means[i]
        d = mean[0] * mean[0] + mean[1] * mean[1] + mean[2] * mean[2]
        v, w = linalg.eigh(cv)

        # print "mean  ", mean
        # print "weight", weights[i], "prec", precs[i]
        # print "weight", weights[i]
        # print "v", v

        if d > 1.0:

            pos = mean
            thick = 2
            cluster_star_obj(obj, pos, thick, v[0])

        else:
            print "reject", mean, v

    coot.set_display_generic_object(obj, 1)

    cluster_assignments_as_list = [int(x) for x in cluster_assignments]

    return (dpgmm, cluster_assignments_as_list)
Ejemplo n.º 4
0
    def find_the_sites(self, file_name_comp_id_list):

        # main line
        #
        coords_with_spec = []

        for fn_comp_id in file_name_comp_id_list:
            fn = fn_comp_id[0]
            comp_id = fn_comp_id[1]
            imol = coot.handle_read_draw_molecule_with_recentre(fn_comp_id[0], 0)
            # what are the residue specs for the given comp_ids?
            residue_specs = coot.get_residue_specs_in_mol_py(imol, comp_id)
            print fn, residue_specs

            for spec in residue_specs:
                # centre = residue_centre_from_spec_py(imol, spec)
                chain_id = rsu.residue_spec_to_chain_id(spec)
                res_no = rsu.residue_spec_to_res_no(spec)
                ins_code = ""

                res_info = coot.residue_info_py(imol, chain_id, res_no, ins_code)

                for atom in res_info:
                    coords_with_spec.append([rsu.residue_atom_to_position(atom), imol, spec])

        # print coords_with_spec

        # now cluster coords. There will be 1 (usually), maybe 2 possibly 3 sites

        if len(coords_with_spec) < 3:

            return False

        else:

            coords = [x[0] for x in coords_with_spec]
            positions_np = np.array(coords)
            n_components = self.optimize_n(positions_np, len(positions_np))
            print "optimize_n for sites::::::::::::", n_components
            dpgmm = mixture.GMM(n_components, covariance_type="full", n_iter=40)
            dpgmm.fit(positions_np)

            cluster_assignments = dpgmm.predict(positions_np)
            means = dpgmm.means_
            weights = dpgmm.weights_

            print cluster_assignments
            print means
            print weights

            print "cluster_assignments", cluster_assignments

            merge_map = self.find_mergeable_clusters(means, weights)
            # which key (i.e. cluster index) has the most number of other clusters
            # that can be merged in?
            #
            # convert to a list of ints (not <type 'numpy.int64'>) (because, on decoding Python->C++ object
            # we do a PyInt_Check for the site_idx (and a <type 'numpy.int64'> fails that test)
            #
            new_cluster_assignments = [int(x) for x in self.merge_clusters(cluster_assignments, merge_map)]
            print "new cluster_assignments", new_cluster_assignments

            specs = [x[1:] for x in coords_with_spec]
            cluster_assignments_with_specs = zip(new_cluster_assignments, specs)

            sites = coot.chemical_feature_clusters_accept_site_clusters_info_py(cluster_assignments_with_specs)

            # show me them
            if True:  # debug
                o = coot.new_generic_object_number("site clusters")
                for mean in means:
                    cluster_star_obj(o, mean, 2, 2)
                # coot.set_display_generic_object(o, 1) this is for debugging

            self.sites = sites
Ejemplo n.º 5
0
def cluster_and_display_chemical_features(site_number, type, chemical_features_list):
    def optimize_n(type, positions_np, n_data):

        print "cluster_and_display_chemical_features.optimize_n called " "with n_data = ", n_data

        bic = {}
        for n in [x + 1 for x in range(10)]:
            if n < n_data:
                gmm = mixture.GMM(n_components=n, covariance_type="spherical", n_iter=20)
                gmm.fit(positions_np)
                score = sum(gmm.score(positions_np))
                lambda_c = 15
                if type == "Aromatic":
                    lambda_c = 20
                bic_l = score - lambda_c * 0.5 * math.log(n_data) * n
                bic[n] = bic_l

        if len(bic) > 1:
            key, value = max(bic.iteritems(), key=lambda x: x[1])
            return key
        else:
            return 1

    def analyse_bic(type, positions_np, n_data):

        for n in [x + 1 for x in range(14)]:
            gmm = mixture.GMM(n_components=n, covariance_type="spherical", n_iter=20)
            gmm.fit(positions_np)
            score = sum(gmm.score(positions_np))
            lambda_c = 3
            if type == "Aromatic":
                lambda_c = 3000
            bic = score - lambda_c * 0.5 * n_data * n
            print type, len(positions_np), n, "converged?", gmm.converged_, "score:", score, "bic", bic

    def get_cfc_col(type):
        if type == "Donor":
            return "blue"
        if type == "Acceptor":
            return "red"
        if type == "Hydrophobe":
            return "yellow"
        if type == "Aromatic":
            return "orange"
        return "grey"

    # --- main line ----

    # no fake points
    # positions_np = np.array([item[0] for item in chemical_features_list])

    ext_chemical_features_list = [item[0] for item in chemical_features_list]

    for item_b in chemical_features_list:
        delta = 0.25
        item = item_b[0]
        p1 = [item[0], item[1], item[2] + delta]
        p2 = [item[0], item[1], item[2] - delta]
        p3 = [item[0], item[1] + delta, item[2]]
        p4 = [item[0], item[1] - delta, item[2]]
        p5 = [item[0] + delta, item[1], item[2]]
        p6 = [item[0] - delta, item[1], item[2]]
        ext_chemical_features_list.append(p1)
        ext_chemical_features_list.append(p2)
        ext_chemical_features_list.append(p3)
        ext_chemical_features_list.append(p4)
        ext_chemical_features_list.append(p5)
        ext_chemical_features_list.append(p6)

    positions_np = np.array(ext_chemical_features_list)

    # analyse_bic(type, positions_np, len(chemical_features_list))

    n_data = len(chemical_features_list)
    n = 1
    if n_data > 1:
        n = optimize_n(type, positions_np, n_data)

    if n <= len(chemical_features_list):
        gmm = mixture.GMM(n_components=n, covariance_type="spherical", n_iter=20)
        gmm.fit(positions_np)
        print type, len(positions_np), n, "converged? ", gmm.converged_, "score:", sum(gmm.score(positions_np))

        cluster_assignments = gmm.predict(positions_np)

        features = []
        for i, cf in enumerate(chemical_features_list):
            # print "     ", cf, cluster_assignments[i]
            features.append([cf, int(cluster_assignments[i])])

        means = gmm.means_
        means_as_list = [[x[0], x[1], x[2]] for x in means]

        obj_name = "CFC Site " + str(site_number) + " " + type + " pharmacophore-clusters"
        cfc_obj = coot.new_generic_object_number(obj_name)
        cfc_col = get_cfc_col(type)
        for mean in means_as_list:
            # coot.to_generic_object_add_dodecahedron(cfc_obj, cfc_col, 0.2, mean[0], mean[1], mean[2])
            coot.to_generic_object_add_pentakis_dodecahedron(cfc_obj, cfc_col, 2.3, 0.1, mean[0], mean[1], mean[2])
        coot.set_display_generic_object(cfc_obj, 1)

        return [type, features, means_as_list]

    # oops too many parameters for the model
    return False
Ejemplo n.º 6
0
def cluster_and_display_waters(site_number, w_positions_np):
    def optimize_n(positions_np, n_data):

        bic = {}
        for n in [x + 1 for x in range(20)]:
            if n < len(positions_np):
                gmm = mixture.GMM(n_components=n,
                                  covariance_type='spherical',
                                  n_iter=20)
                gmm.fit(positions_np)
                score = sum(gmm.score(positions_np))
                lambda_c = 15  # 3 too few
                bic_l = score - lambda_c * 0.5 * math.log(n_data) * n
                bic[n] = bic_l

        for key in bic:
            print("   water bic", key, bic[key])

        key, value = max(iter(bic.items()), key=lambda x: x[1])
        return key

    n_components = optimize_n(w_positions_np, len(w_positions_np))
    print("optimize_n for water:::::::::::::", n_components)
    dpgmm = mixture.GMM(n_components, covariance_type='spherical', n_iter=40)
    dpgmm.fit(w_positions_np)

    cluster_assignments = dpgmm.predict(w_positions_np)

    color_list = [
        'green', 'greentint', "sea", 'yellow', "yellowtint", "aquamarine",
        "forestgreen", "goldenrod", "orangered", "orange", "cyan", 'red',
        "blue"
    ]
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)
    color_list.extend(color_list)

    means = dpgmm.means_
    cvs = dpgmm._get_covars()
    weights = dpgmm.weights_

    obj = coot.new_generic_object_number("CFC Site " + str(site_number) +
                                         " selected waters")
    for i, pos in enumerate(w_positions_np):
        mean = means[cluster_assignments[i]]
        # reject spheres at the origin - (from DPGMM strangeness)
        d = mean[0] * mean[0] + mean[1] * mean[1] + mean[2] * mean[2]
        if d > 1.0:
            col = color_list[cluster_assignments[i]]
            coot.to_generic_object_add_point(obj, col, 10, pos[0], pos[1],
                                             pos[2])
        else:
            print("reject prediction", i, "for cluster",
                  cluster_assignments[i])

    # set_display_generic_object(obj, 1)

    obj = coot.new_generic_object_number("CFC Site " + str(site_number) +
                                         " water cluster means")

    for i, cv in enumerate(cvs):

        mean = means[i]
        d = mean[0] * mean[0] + mean[1] * mean[1] + mean[2] * mean[2]
        v, w = linalg.eigh(cv)

        # print "mean  ", mean
        # print "weight", weights[i], "prec", precs[i]
        # print "weight", weights[i]
        # print "v", v

        if d > 1.0:

            pos = mean
            thick = 2
            cluster_star_obj(obj, pos, thick, v[0])

        else:
            print("reject", mean, v)

    coot.set_display_generic_object(obj, 1)

    cluster_assignments_as_list = [int(x) for x in cluster_assignments]

    return (dpgmm, cluster_assignments_as_list)
Ejemplo n.º 7
0
    def find_the_sites(self, file_name_comp_id_list):

        # main line
        #
        coords_with_spec = []

        for fn_comp_id in file_name_comp_id_list:
            fn = fn_comp_id[0]
            comp_id = fn_comp_id[1]
            imol = coot.handle_read_draw_molecule_with_recentre(
                fn_comp_id[0], 0)
            # what are the residue specs for the given comp_ids?
            residue_specs = coot.get_residue_specs_in_mol_py(imol, comp_id)
            print(fn, residue_specs)

            for spec in residue_specs:
                # centre = residue_centre_from_spec_py(imol, spec)
                chain_id = rsu.residue_spec_to_chain_id(spec)
                res_no = rsu.residue_spec_to_res_no(spec)
                ins_code = ''

                res_info = coot.residue_info_py(imol, chain_id, res_no,
                                                ins_code)

                for atom in res_info:
                    coords_with_spec.append(
                        [rsu.residue_atom_to_position(atom), imol, spec])

        # print coords_with_spec

        # now cluster coords. There will be 1 (usually), maybe 2 possibly 3 sites

        if len(coords_with_spec) < 3:

            return False

        else:

            coords = [x[0] for x in coords_with_spec]
            positions_np = np.array(coords)
            n_components = self.optimize_n(positions_np, len(positions_np))
            print("optimize_n for sites::::::::::::", n_components)
            dpgmm = mixture.GMM(n_components,
                                covariance_type='full',
                                n_iter=40)
            dpgmm.fit(positions_np)

            cluster_assignments = dpgmm.predict(positions_np)
            means = dpgmm.means_
            weights = dpgmm.weights_

            print(cluster_assignments)
            print(means)
            print(weights)

            print("cluster_assignments", cluster_assignments)

            merge_map = self.find_mergeable_clusters(means, weights)
            # which key (i.e. cluster index) has the most number of other clusters
            # that can be merged in?
            #
            # convert to a list of ints (not <type 'numpy.int64'>) (because, on decoding Python->C++ object
            # we do a PyInt_Check for the site_idx (and a <type 'numpy.int64'> fails that test)
            #
            new_cluster_assignments = [
                int(x)
                for x in self.merge_clusters(cluster_assignments, merge_map)
            ]
            print("new cluster_assignments", new_cluster_assignments)

            specs = [x[1:] for x in coords_with_spec]
            cluster_assignments_with_specs = zip(new_cluster_assignments,
                                                 specs)

            sites = coot.chemical_feature_clusters_accept_site_clusters_info_py(
                cluster_assignments_with_specs)

            # show me them
            if True:  # debug
                o = coot.new_generic_object_number("site clusters")
                for mean in means:
                    cluster_star_obj(o, mean, 2, 2)
                # coot.set_display_generic_object(o, 1) this is for debugging

            self.sites = sites
Ejemplo n.º 8
0
def cluster_and_display_chemical_features(site_number, type,
                                          chemical_features_list):
    def optimize_n(type, positions_np, n_data):

        print("cluster_and_display_chemical_features.optimize_n called " \
               "with n_data = ", n_data)

        bic = {}
        for n in [x + 1 for x in range(10)]:
            if n < n_data:
                gmm = mixture.GMM(n_components=n,
                                  covariance_type='spherical',
                                  n_iter=20)
                gmm.fit(positions_np)
                score = sum(gmm.score(positions_np))
                lambda_c = 15
                if type == 'Aromatic':
                    lambda_c = 20
                bic_l = score - lambda_c * 0.5 * math.log(n_data) * n
                bic[n] = bic_l

        if len(bic) > 1:
            key, value = max(iter(bic.items()), key=lambda x: x[1])
            return key
        else:
            return 1

    def analyse_bic(type, positions_np, n_data):

        for n in [x + 1 for x in range(14)]:
            gmm = mixture.GMM(n_components=n,
                              covariance_type='spherical',
                              n_iter=20)
            gmm.fit(positions_np)
            score = sum(gmm.score(positions_np))
            lambda_c = 3
            if type == 'Aromatic':
                lambda_c = 3000
            bic = score - lambda_c * 0.5 * n_data * n
            print(type, len(positions_np), n, "converged?", gmm.converged_,
                  "score:", score, "bic", bic)

    def get_cfc_col(type):
        if type == "Donor":
            return "blue"
        if type == "Acceptor":
            return "red"
        if type == "Hydrophobe":
            return "yellow"
        if type == "Aromatic":
            return "orange"
        return "grey"

    # --- main line ----

    # no fake points
    # positions_np = np.array([item[0] for item in chemical_features_list])

    ext_chemical_features_list = [item[0] for item in chemical_features_list]

    for item_b in chemical_features_list:
        delta = 0.25
        item = item_b[0]
        p1 = [item[0], item[1], item[2] + delta]
        p2 = [item[0], item[1], item[2] - delta]
        p3 = [item[0], item[1] + delta, item[2]]
        p4 = [item[0], item[1] - delta, item[2]]
        p5 = [item[0] + delta, item[1], item[2]]
        p6 = [item[0] - delta, item[1], item[2]]
        ext_chemical_features_list.append(p1)
        ext_chemical_features_list.append(p2)
        ext_chemical_features_list.append(p3)
        ext_chemical_features_list.append(p4)
        ext_chemical_features_list.append(p5)
        ext_chemical_features_list.append(p6)

    positions_np = np.array(ext_chemical_features_list)

    # analyse_bic(type, positions_np, len(chemical_features_list))

    n_data = len(chemical_features_list)
    n = 1
    if n_data > 1:
        n = optimize_n(type, positions_np, n_data)

    if n <= len(chemical_features_list):
        gmm = mixture.GMM(n_components=n,
                          covariance_type='spherical',
                          n_iter=20)
        gmm.fit(positions_np)
        print(type, len(positions_np), n, "converged? ", gmm.converged_,
              "score:", sum(gmm.score(positions_np)))

        cluster_assignments = gmm.predict(positions_np)

        features = []
        for i, cf in enumerate(chemical_features_list):
            # print "     ", cf, cluster_assignments[i]
            features.append([cf, int(cluster_assignments[i])])

        means = gmm.means_
        means_as_list = [[x[0], x[1], x[2]] for x in means]

        obj_name = "CFC Site " + str(
            site_number) + " " + type + " pharmacophore-clusters"
        cfc_obj = coot.new_generic_object_number(obj_name)
        cfc_col = get_cfc_col(type)
        for mean in means_as_list:
            # coot.to_generic_object_add_dodecahedron(cfc_obj, cfc_col, 0.2, mean[0], mean[1], mean[2])
            coot.to_generic_object_add_pentakis_dodecahedron(
                cfc_obj, cfc_col, 2.3, 0.1, mean[0], mean[1], mean[2])
        coot.set_display_generic_object(cfc_obj, 1)

        return [type, features, means_as_list]

    # oops too many parameters for the model
    return False