Example #1
0
sys.path.append('/home/people/tc/svn/tc_sandbox/pdb/')
import biounit,parse_pdb
sys.path.append('/home/people/tc/svn/tc_sandbox/misc/')
import combinatorics

path_pdb = '/data/pdb-v3.2'

d_radii_vdw = {
    'H':1.20,
    'C':1.70,
    'N':1.55,
    'O':1.52,
    'S':1.80,
    }

l_translations = combinatorics.permutation_w_rep([-1,0,1,],3)
l_translations.remove([0,0,0,])

s_alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' # 0123456789

## Give the cutoff for "NEAR" contacts (Angstrom)
if '-sympar' in sys.argv:
    sympar = float(sys.argv[sys.argv.index('-sympar')+1])
else:
    sympar = 5.

def main(pdb):

    if not os.path.isfile('%s.pdb' %(pdb)):
        os.system('cp %s/%s/pdb%s.ent %s.pdb' %(path_pdb,pdb[1:3],pdb,pdb,))
Example #2
0
def add_probe_atoms(
    d_coords,
    d_dimensions,
    dist_min_sq,
    dist_max_sq,
):

    dist_max = math.sqrt(dist_max_sq)
    dist_min = math.sqrt(dist_min_sq)

    bool_test = True
    bool_test = False

    l_atom_nos = d_coords.keys()
    l_atom_nos.sort()

    ##    ## avoid placing probe next to terminal mobile residues (should be by residue number)
    ##    l_atom_nos = l_atom_nos[20:-20]

    l_coords_probe = []
    l_pdb_coords_solvent = []

    x_min = int(d_dimensions['x']['min']) - shell
    x_max = int(d_dimensions['x']['max']) + shell + 1
    y_min = int(d_dimensions['y']['min']) - shell
    y_max = int(d_dimensions['y']['max']) + shell + 1
    z_min = int(d_dimensions['z']['min']) - shell
    z_max = int(d_dimensions['z']['max']) + shell + 1

    ##    ##
    ##    ## faster method but doesnt calculate angle
    ##    ##
    ##    d_class = {}
    ##    for x in range(x_min,x_max,grid):
    ##        d_class[x] = {}
    ##        for y in range(y_min,y_max,grid):
    ##            d_class[x][y] = {}
    ##            for z in range(z_min,z_max,grid):
    ##                d_class[x][y][z] = None
    ##    for k,v in d_coords.items():
    ##
    ##        atom_name_protein = v['atom_name']
    ##        if atom_name_protein[0] == 'H':
    ##            continue
    ##
    ##        coord = v['coord']
    ##
    ##        x_min_local = int(coord[0]-coord[0]%grid+x_min%grid-dist_max-grid)
    ##        x_max_local = int(coord[0]-coord[0]%grid+x_min%grid+dist_max+grid)
    ##        y_min_local = int(coord[1]-coord[1]%grid+y_min%grid-dist_max-grid)
    ##        y_max_local = int(coord[1]-coord[1]%grid+y_min%grid+dist_max+grid)
    ##        z_min_local = int(coord[2]-coord[2]%grid+z_min%grid-dist_max-grid)
    ##        z_max_local = int(coord[2]-coord[2]%grid+z_min%grid+dist_max+grid)
    ##        x_max_local = min(x_max,x_max_local)
    ##        y_max_local = min(y_max,y_max_local)
    ##        z_max_local = min(z_max,z_max_local)
    ##        x_min_local = max(x_min,x_min_local)
    ##        y_min_local = max(y_min,y_min_local)
    ##        z_min_local = max(z_min,z_min_local)
    ##
    ##        for x in range(x_min_local,x_max_local,grid):
    ##            for y in range(y_min_local,y_max_local,grid):
    ##                for z in range(z_min_local,z_max_local,grid):
    ##                    ## if too close once, then always too close
    ##                    if d_class[x][y][z] == 'tooclose':
    ##                        continue
    ##                    coord_grid = numpy.array([x,y,z])
    ##                    dist_sq = sum((coord-coord_grid)**2)
    ##                    if dist_sq > dist_max_sq:
    ##                        d_class[x][y][z] = 'toofar'
    ####                        print k, 'a'
    ##                    elif dist_sq < dist_min_sq:
    ##                        d_class[x][y][z] = 'tooclose'
    ####                        print k, 'b'
    ##                    else:
    ##                        d_class[x][y][z] = 'extra'
    ##    l_coords = []
    ##    for x in range(x_min,x_max,grid):
    ##        for y in range(y_min,y_max,grid):
    ##            for z in range(z_min,z_max,grid):
    ##                if d_class[x][y][z] == 'extra':
    ##                    coord = numpy.array([x,y,z])
    ##                    l_coords += [coord]
    ##    print len(l_coords)
    ##    l_overlaps = [0]*len(l_coords)
    ##    write_pdb(l_overlaps,l_coords,'2exo','A',suffix='_test_distmin%s_distmax%s' %(dist_min,dist_max,))
    ##    stop

    d_class = {}
    for x in range(x_min, x_max, grid):
        print x, x_min, x_max
        d_class[x] = {}
        for y in range(y_min, y_max, grid):
            d_class[x][y] = {}
            for z in range(z_min, z_max, grid):
                d_class[x][y][z] = {}

                bool_vicinal = False
                bool_not_distant = False
                ##                bool_not_distant = 0

                l_coords_not_distant = []
                for atom_no_protein in l_atom_nos:

                    d = d_coords[atom_no_protein]
                    atom_name_protein = d['atom_name']
                    coord_protein = d['coord']

                    ## skip if hydrogen atoms
                    if atom_name_protein[0] == 'H':
                        continue

##                    if not atom_name_protein in l_atoms:
##                        continue

                    dist_sq = ((x - coord_protein[0])**2 +
                               (y - coord_protein[1])**2 +
                               (z - coord_protein[2])**2)
                    if dist_sq < dist_min_sq:
                        bool_vicinal = True
##                    if dist_sq < dist_max_sq:
##                    if atom_name_protein == 'CA' and dist_sq < dist_max_sq:
##                    if atom_name_protein in ['N','CA','CB','C','O',] and dist_sq < dist_max_sq:
                    if atom_name_protein in l_atoms and dist_sq < dist_max_sq:
                        bool_not_distant = True
                        ## only do angle between alpha carbon atoms
                        if atom_name_protein == 'CA':
                            l_coords_not_distant += [coord_protein]


##                        bool_not_distant += 1
                    if bool_vicinal == True:
                        break

                if bool_vicinal == False:
                    if bool_not_distant == True:

                        ##                        if bool_test != True:
                        ##
                        ##                            l_coords_probe += [numpy.array([x,y,z,])]
                        ##                            d_class[x][y][z]['class'] = 'possible_pocket'
                        ##
                        ##                        else:

                        d_class[x][y][z]['class'] = 'not_pocket'
                        if len(l_coords_not_distant) > 1:
                            for i in range(len(l_coords_not_distant) - 1):
                                coord1 = l_coords_not_distant[i]
                                v1 = coord1 - numpy.array([
                                    x,
                                    y,
                                    z,
                                ])
                                for j in range(i + 1,
                                               len(l_coords_not_distant)):
                                    coord2 = l_coords_not_distant[j]
                                    v2 = coord2 - numpy.array([
                                        x,
                                        y,
                                        z,
                                    ])

                                    ## angle larger than xxx'
                                    if numpy.dot(v1, v2) < 0:  ## angle > 90
                                        d_class[x][y][z][
                                            'class'] = 'possible_pocket'
                                        l_coords_probe += [[
                                            x,
                                            y,
                                            z,
                                        ]]
                                        break
                                if d_class[x][y][z][
                                        'class'] == 'possible_pocket':
                                    break

                    elif bool_not_distant == False:
                        d_class[x][y][z]['class'] = 'distant'
                elif bool_vicinal == True:
                    d_class[x][y][z]['class'] = 'protein'

    if bool_test == True:

        sys.path.append('/home/people/tc/svn/tc_sandbox/misc/')
        import combinatorics
        l_translations = combinatorics.permutation_w_rep([
            -1,
            0,
            1,
        ], 3)
        ##    l_translations = combinatorics.permutation_w_rep([-2,-1,0,1,2,],5)
        l_translations.remove([
            0,
            0,
            0,
        ])

        l_overlaps = []

        d_class_grid = {}
        for x in range(
                int(d_dimensions['x']['min']) - shell,
                int(d_dimensions['x']['max']) + shell + 1, grid):
            print x
            d_class_grid[x] = {}
            for y in range(
                    int(d_dimensions['y']['min']) - shell,
                    int(d_dimensions['y']['max']) + shell + 1, grid):
                d_class_grid[x][y] = {}
                for z in range(
                        int(d_dimensions['z']['min']) - shell,
                        int(d_dimensions['z']['max']) + shell + 1, grid):
                    count_pocket = 1  ## point itself is pocket
                    count_protein = 0
                    count_distant = 0

                    if d_class[x][y][z]['class'] != 'possible_pocket':
                        continue

                    for translation in l_translations:
                        try:
                            Class = d_class[x + translation[0] * grid][
                                y + translation[1] * grid][z + translation[2] *
                                                           grid]['class']
                            if Class == 'possible_pocket':
                                count_pocket += 1
                            elif Class == 'protein':
                                count_protein += 1
                            elif Class == 'distant':
                                count_distant += 1
                        except:
                            None

                    count_pocket_min = 16
                    count_protein_min = 18
                    count_distant_max = 27
                    if (count_pocket >= count_pocket_min
                            or count_protein >= count_protein_min
                        ) and count_distant <= count_distant_max:
                        print count_distant
                        ## near pocket and semi buried in protein (not surface)
                        if count_pocket >= count_pocket_min and count_protein > count_protein_min / 2.:
                            count_pocket = 13  ## green, include
                        ## not vicinal to many pocket grid points, but very buried in protein
                        elif count_protein >= count_protein_min:
                            count_pocket = 27  ## blue, include
                        ## not near other pocket points
                        elif count_pocket <= count_pocket_min / 2.:
                            count_pocket = 24  ## light blue, exclude
    ##                    ## surface of protein
    ##                    elif count_protein <= count_protein_min/2.:
    ##                        count_pocket = 6 ## orange, exclude
    ## not buried = in a large pocket = possibly ligand binding site
                        else:
                            count_pocket = 21  ## cyan
                    else:
                        ##                    print count_pocket+count_protein+count_distant, count_pocket, count_protein, count_distant
                        count_pocket = 0  ## red, exclude
                        print x, y, z

    ##                count_pocket_min = 25
    ##                count_protein_min = 70
    ##                print count_protein, count_pocket
    ##                if count_pocket >= count_pocket_min or count_protein >= count_protein_min:
    ##                    ## not vicinal to many pocket grid points, but very buried in protein
    ##                    if count_pocket < count_pocket_min and count_protein >= count_protein_min:
    ##                        count_pocket = 27 ## blue, include
    ##                    ## near pocket grid points, but surface of protein
    ##                    elif count_pocket >= count_pocket_min and count_protein <= count_protein_min/2.:
    ##                        count_pocket = 6 ## orange, exclude
    ##                    ## near pocket and semi buried in protein (not surface)
    ##                    elif count_pocket >= count_pocket_min and count_protein > count_protein_min/2.:
    ##                        count_pocket = 13 ## green, include
    ##                    else:
    ##                        stopstop
    ##                else:
    ##                    count_pocket = 0 ## red, exclude
                    l_overlaps += [count_pocket / 27.]

    ##            d_class_grid[x][y][z] = count_pocket

    ##                if count > 0:
    ##                    print x,y,z,d_class_grid[x][y][z]
    ##                    stop
    ##    print d_class_grid[-10][-1]
    ##    stop

        print 'first coord', l_coords_probe[0]
        print 'last coord', l_coords_probe[-1]
        print 'coords', len(l_coords_probe)
        print 'bfactors', len(l_overlaps)
        if len(l_coords_probe) != len(l_overlaps):
            print len(l_coords_probe)
            print len(l_overlaps)
            stop

    return l_coords_probe
def main():

    pdb = "2lzt"

    import Numeric
    import sys

    sys.path.append("/home/people/tc/svn/tc_sandbox/misc/")
    import combinatorics

    l_translations = combinatorics.permutation_w_rep([-1, 0, 1], 3)
    l_translations.remove([0, 0, 0])

    d_coordinates, d_290, l_coordinates, cryst1 = parse_pdb(pdb)

    l_chains = [
        "B",
        "C",
        "D",
        "E",
        "F",
        "G",
        "H",
        "I",
        "J",
        "K",
        "L",
        "M",
        "N",
        "O",
        "P",
        "Q",
        "R",
        "S",
        "T",
        "U",
        "V",
        "W",
        "X",
        "Y",
        "Z",
        "b",
        "c",
        "d",
        "e",
        "f",
        "g",
        "h",
        "i",
        "j",
        "k",
        "l",
        "m",
        "n",
        "o",
        "p",
        "q",
        "r",
        "s",
        "t",
        "u",
        "v",
        "w",
        "x",
        "y",
        "z",
        "0",
        "1",
        "2",
        "3",
        "4",
        "5",
        "6",
        "7",
        "8",
        "9",
    ]
    chain_index = -1
    atom_no = 1001
    prev_residue = ["N/A", "N/A", "N/A"]
    lines = []
    for operator in d_290.keys():
        matrix = d_290[operator]
        for i in range(len(l_translations)):
            translation = Numeric.array(l_translations[i]) * cryst1
            print operator, i, chain_index
            for chain in d_coordinates.keys():
                l_res_nos = d_coordinates[chain].keys()
                l_res_nos.sort()
                for res_no in l_res_nos:
                    for atom_name in d_coordinates[chain][res_no]["atoms"].keys():
                        coordinate2 = d_coordinates[chain][res_no]["atoms"][atom_name]["coordinate"]
                        coordinate2 = Numeric.array(list(coordinate2))
                        coordinate2 += translation
                        vicinal = False
                        for coordinate1 in l_coordinates:
                            sqdist = sum((coordinate2 - coordinate1) ** 2)
                            if sqdist < 225:
                                vicinal = True
                                break
                        if vicinal == True:
                            break
                    if vicinal == True or prev_residue == [i, res_no - 1]:

                        ## new chain ID
                        if prev_residue not in [[i, res_no - 3], [i, res_no - 2], [i, res_no - 1]]:
                            print i, res_no, prev_residue
                            chain_index += 1

                        ## add previous residue
                        if (
                            prev_residue not in [[i, res_no - 2], [i, res_no - 1]]
                            and res_no - 1 in d_coordinates[chain].keys()
                        ):
                            lines, atom_no = newline(
                                atom_no,
                                translation,
                                lines,
                                l_chains,
                                chain_index,
                                d_coordinates,
                                pdb,
                                chain,
                                res_no - 1,
                            )

                        ## add current residue
                        if prev_residue not in [[i, res_no - 1]]:
                            lines, atom_no = newline(
                                atom_no, translation, lines, l_chains, chain_index, d_coordinates, pdb, chain, res_no
                            )

                        ## add next residue
                        if res_no + 1 in d_coordinates[chain].keys():
                            lines, atom_no = newline(
                                atom_no,
                                translation,
                                lines,
                                l_chains,
                                chain_index,
                                d_coordinates,
                                pdb,
                                chain,
                                res_no + 1,
                            )

                        if vicinal == True:
                            prev_residue = [i, res_no]

    fd = open("%s_contacts.pdb" % (pdb), "w")
    fd.writelines(lines)
    fd.close()

    print len(lines), len(set(lines))

    return
Example #4
0
def main():

    pdb = '2lzt'

    import Numeric
    import sys
    sys.path.append('/home/people/tc/svn/tc_sandbox/misc/')
    import combinatorics

    l_translations = combinatorics.permutation_w_rep([
        -1,
        0,
        1,
    ], 3)
    l_translations.remove([
        0,
        0,
        0,
    ])

    d_coordinates, d_290, l_coordinates, cryst1 = parse_pdb(pdb)

    l_chains = [
        'B',
        'C',
        'D',
        'E',
        'F',
        'G',
        'H',
        'I',
        'J',
        'K',
        'L',
        'M',
        'N',
        'O',
        'P',
        'Q',
        'R',
        'S',
        'T',
        'U',
        'V',
        'W',
        'X',
        'Y',
        'Z',
        'b',
        'c',
        'd',
        'e',
        'f',
        'g',
        'h',
        'i',
        'j',
        'k',
        'l',
        'm',
        'n',
        'o',
        'p',
        'q',
        'r',
        's',
        't',
        'u',
        'v',
        'w',
        'x',
        'y',
        'z',
        '0',
        '1',
        '2',
        '3',
        '4',
        '5',
        '6',
        '7',
        '8',
        '9',
    ]
    chain_index = -1
    atom_no = 1001
    prev_residue = [
        'N/A',
        'N/A',
        'N/A',
    ]
    lines = []
    for operator in d_290.keys():
        matrix = d_290[operator]
        for i in range(len(l_translations)):
            translation = Numeric.array(l_translations[i]) * cryst1
            print operator, i, chain_index
            for chain in d_coordinates.keys():
                l_res_nos = d_coordinates[chain].keys()
                l_res_nos.sort()
                for res_no in l_res_nos:
                    for atom_name in d_coordinates[chain][res_no][
                            'atoms'].keys():
                        coordinate2 = d_coordinates[chain][res_no]['atoms'][
                            atom_name]['coordinate']
                        coordinate2 = Numeric.array(list(coordinate2))
                        coordinate2 += translation
                        vicinal = False
                        for coordinate1 in l_coordinates:
                            sqdist = sum((coordinate2 - coordinate1)**2)
                            if sqdist < 225:
                                vicinal = True
                                break
                        if vicinal == True:
                            break
                    if vicinal == True or prev_residue == [
                            i,
                            res_no - 1,
                    ]:

                        ## new chain ID
                        if prev_residue not in [
                            [
                                i,
                                res_no - 3,
                            ],
                            [
                                i,
                                res_no - 2,
                            ],
                            [
                                i,
                                res_no - 1,
                            ],
                        ]:
                            print i, res_no, prev_residue
                            chain_index += 1

                        ## add previous residue
                        if prev_residue not in [
                            [
                                i,
                                res_no - 2,
                            ],
                            [
                                i,
                                res_no - 1,
                            ],
                        ] and res_no - 1 in d_coordinates[chain].keys():
                            lines, atom_no = newline(
                                atom_no,
                                translation,
                                lines,
                                l_chains,
                                chain_index,
                                d_coordinates,
                                pdb,
                                chain,
                                res_no - 1,
                            )

                        ## add current residue
                        if prev_residue not in [
                            [
                                i,
                                res_no - 1,
                            ],
                        ]:
                            lines, atom_no = newline(
                                atom_no,
                                translation,
                                lines,
                                l_chains,
                                chain_index,
                                d_coordinates,
                                pdb,
                                chain,
                                res_no,
                            )

                        ## add next residue
                        if res_no + 1 in d_coordinates[chain].keys():
                            lines, atom_no = newline(
                                atom_no,
                                translation,
                                lines,
                                l_chains,
                                chain_index,
                                d_coordinates,
                                pdb,
                                chain,
                                res_no + 1,
                            )

                        if vicinal == True:
                            prev_residue = [
                                i,
                                res_no,
                            ]

    fd = open('%s_contacts.pdb' % (pdb), 'w')
    fd.writelines(lines)
    fd.close()

    print len(lines), len(set(lines))

    return
def add_probe_atoms(d_coords,d_dimensions,dist_min_sq,dist_max_sq,):

    dist_max = math.sqrt(dist_max_sq)
    dist_min = math.sqrt(dist_min_sq)

    bool_test = True
    bool_test = False

    l_atom_nos = d_coords.keys()
    l_atom_nos.sort()

##    ## avoid placing probe next to terminal mobile residues (should be by residue number)
##    l_atom_nos = l_atom_nos[20:-20]

    l_coords_probe = []
    l_pdb_coords_solvent = []

    x_min = int(d_dimensions['x']['min'])-shell
    x_max = int(d_dimensions['x']['max'])+shell+1
    y_min = int(d_dimensions['y']['min'])-shell
    y_max = int(d_dimensions['y']['max'])+shell+1
    z_min = int(d_dimensions['z']['min'])-shell
    z_max = int(d_dimensions['z']['max'])+shell+1


##    ##
##    ## faster method but doesnt calculate angle
##    ##
##    d_class = {}
##    for x in range(x_min,x_max,grid):
##        d_class[x] = {}
##        for y in range(y_min,y_max,grid):
##            d_class[x][y] = {}
##            for z in range(z_min,z_max,grid):
##                d_class[x][y][z] = None
##    for k,v in d_coords.items():
##
##        atom_name_protein = v['atom_name']
##        if atom_name_protein[0] == 'H':
##            continue
##
##        coord = v['coord']
##
##        x_min_local = int(coord[0]-coord[0]%grid+x_min%grid-dist_max-grid)
##        x_max_local = int(coord[0]-coord[0]%grid+x_min%grid+dist_max+grid)
##        y_min_local = int(coord[1]-coord[1]%grid+y_min%grid-dist_max-grid)
##        y_max_local = int(coord[1]-coord[1]%grid+y_min%grid+dist_max+grid)
##        z_min_local = int(coord[2]-coord[2]%grid+z_min%grid-dist_max-grid)
##        z_max_local = int(coord[2]-coord[2]%grid+z_min%grid+dist_max+grid)
##        x_max_local = min(x_max,x_max_local)
##        y_max_local = min(y_max,y_max_local)
##        z_max_local = min(z_max,z_max_local)
##        x_min_local = max(x_min,x_min_local)
##        y_min_local = max(y_min,y_min_local)
##        z_min_local = max(z_min,z_min_local)
##
##        for x in range(x_min_local,x_max_local,grid):
##            for y in range(y_min_local,y_max_local,grid):
##                for z in range(z_min_local,z_max_local,grid):
##                    ## if too close once, then always too close
##                    if d_class[x][y][z] == 'tooclose':
##                        continue
##                    coord_grid = numpy.array([x,y,z])
##                    dist_sq = sum((coord-coord_grid)**2)
##                    if dist_sq > dist_max_sq:
##                        d_class[x][y][z] = 'toofar'
####                        print k, 'a'
##                    elif dist_sq < dist_min_sq:
##                        d_class[x][y][z] = 'tooclose'
####                        print k, 'b'
##                    else:
##                        d_class[x][y][z] = 'extra'
##    l_coords = []
##    for x in range(x_min,x_max,grid):
##        for y in range(y_min,y_max,grid):
##            for z in range(z_min,z_max,grid):
##                if d_class[x][y][z] == 'extra':
##                    coord = numpy.array([x,y,z])
##                    l_coords += [coord]
##    print len(l_coords)
##    l_overlaps = [0]*len(l_coords)
##    write_pdb(l_overlaps,l_coords,'2exo','A',suffix='_test_distmin%s_distmax%s' %(dist_min,dist_max,))
##    stop


    d_class = {}
    for x in range(x_min,x_max,grid):
        print x, x_min, x_max
        d_class[x] = {}
        for y in range(y_min,y_max,grid):
            d_class[x][y] = {}
            for z in range(z_min,z_max,grid):
                d_class[x][y][z] = {}

                bool_vicinal = False
                bool_not_distant = False
##                bool_not_distant = 0

                l_coords_not_distant = []
                for atom_no_protein in l_atom_nos:

                    d = d_coords[atom_no_protein]
                    atom_name_protein = d['atom_name']
                    coord_protein = d['coord']

                    ## skip if hydrogen atoms
                    if atom_name_protein[0] == 'H':
                        continue

##                    if not atom_name_protein in l_atoms:
##                        continue

                    dist_sq = (
                        (x-coord_protein[0])**2
                        +
                        (y-coord_protein[1])**2
                        +
                        (z-coord_protein[2])**2
                        )
                    if dist_sq < dist_min_sq:
                        bool_vicinal = True
##                    if dist_sq < dist_max_sq:
##                    if atom_name_protein == 'CA' and dist_sq < dist_max_sq:
##                    if atom_name_protein in ['N','CA','CB','C','O',] and dist_sq < dist_max_sq:
                    if atom_name_protein in l_atoms and dist_sq < dist_max_sq:
                        bool_not_distant = True
                        ## only do angle between alpha carbon atoms
                        if atom_name_protein == 'CA':
                            l_coords_not_distant += [coord_protein]
##                        bool_not_distant += 1
                    if bool_vicinal == True:
                        break

                if bool_vicinal == False:
                    if bool_not_distant == True:

##                        if bool_test != True:
##
##                            l_coords_probe += [numpy.array([x,y,z,])]
##                            d_class[x][y][z]['class'] = 'possible_pocket'
##
##                        else:
                            
                            d_class[x][y][z]['class'] = 'not_pocket'
                            if len(l_coords_not_distant) > 1:
                                for i in range(len(l_coords_not_distant)-1):
                                    coord1 = l_coords_not_distant[i]
                                    v1 = coord1-numpy.array([x,y,z,])
                                    for j in range(i+1,len(l_coords_not_distant)):
                                        coord2 = l_coords_not_distant[j]
                                        v2 = coord2-numpy.array([x,y,z,])
                                        
                                        ## angle larger than xxx'
                                        if numpy.dot(v1,v2) < 0: ## angle > 90
                                            d_class[x][y][z]['class'] = 'possible_pocket'
                                            l_coords_probe += [[x,y,z,]]
                                            break
                                    if d_class[x][y][z]['class'] == 'possible_pocket':
                                        break
                                    
                    elif bool_not_distant == False:
                        d_class[x][y][z]['class'] = 'distant'
                elif bool_vicinal == True:
                    d_class[x][y][z]['class'] = 'protein'

    if bool_test == True:

        sys.path.append('/home/people/tc/svn/tc_sandbox/misc/')
        import combinatorics
        l_translations = combinatorics.permutation_w_rep([-1,0,1,],3)
    ##    l_translations = combinatorics.permutation_w_rep([-2,-1,0,1,2,],5)
        l_translations.remove([0,0,0,])

        l_overlaps = []

        d_class_grid = {}
        for x in range(int(d_dimensions['x']['min'])-shell,int(d_dimensions['x']['max'])+shell+1,grid):
            print x
            d_class_grid[x] = {}
            for y in range(int(d_dimensions['y']['min'])-shell,int(d_dimensions['y']['max'])+shell+1,grid):
                d_class_grid[x][y] = {}
                for z in range(int(d_dimensions['z']['min'])-shell,int(d_dimensions['z']['max'])+shell+1,grid):
                    count_pocket = 1 ## point itself is pocket
                    count_protein = 0
                    count_distant = 0

                    if d_class[x][y][z]['class'] != 'possible_pocket':
                        continue

                    for translation in l_translations:
                        try:
                            Class = d_class[x+translation[0]*grid][y+translation[1]*grid][z+translation[2]*grid]['class']
                            if Class == 'possible_pocket':
                                count_pocket += 1
                            elif Class == 'protein':
                                count_protein += 1
                            elif Class == 'distant':
                                count_distant += 1
                        except:
                            None

                    count_pocket_min = 16
                    count_protein_min = 18
                    count_distant_max = 27
                    if (count_pocket >= count_pocket_min or count_protein >= count_protein_min) and count_distant <= count_distant_max:
                        print count_distant
                        ## near pocket and semi buried in protein (not surface)
                        if   count_pocket >= count_pocket_min and count_protein >  count_protein_min/2.:
                            count_pocket = 13 ## green, include
                        ## not vicinal to many pocket grid points, but very buried in protein
                        elif count_protein >= count_protein_min:
                            count_pocket = 27 ## blue, include
                        ## not near other pocket points
                        elif count_pocket <= count_pocket_min/2.:
                            count_pocket = 24 ## light blue, exclude
    ##                    ## surface of protein
    ##                    elif count_protein <= count_protein_min/2.:
    ##                        count_pocket = 6 ## orange, exclude
                        ## not buried = in a large pocket = possibly ligand binding site
                        else:
                            count_pocket = 21 ## cyan
                    else:
    ##                    print count_pocket+count_protein+count_distant, count_pocket, count_protein, count_distant
                        count_pocket = 0 ## red, exclude
                        print x,y,z

    ##                count_pocket_min = 25
    ##                count_protein_min = 70
    ##                print count_protein, count_pocket
    ##                if count_pocket >= count_pocket_min or count_protein >= count_protein_min:
    ##                    ## not vicinal to many pocket grid points, but very buried in protein
    ##                    if count_pocket < count_pocket_min and count_protein >= count_protein_min:
    ##                        count_pocket = 27 ## blue, include
    ##                    ## near pocket grid points, but surface of protein
    ##                    elif count_pocket >= count_pocket_min and count_protein <= count_protein_min/2.:
    ##                        count_pocket = 6 ## orange, exclude
    ##                    ## near pocket and semi buried in protein (not surface)
    ##                    elif count_pocket >= count_pocket_min and count_protein > count_protein_min/2.:
    ##                        count_pocket = 13 ## green, include
    ##                    else:
    ##                        stopstop
    ##                else:
    ##                    count_pocket = 0 ## red, exclude
                    l_overlaps += [count_pocket/27.]

    ##            d_class_grid[x][y][z] = count_pocket

    ##                if count > 0:
    ##                    print x,y,z,d_class_grid[x][y][z]
    ##                    stop
    ##    print d_class_grid[-10][-1]
    ##    stop

        print 'first coord', l_coords_probe[0]
        print 'last coord', l_coords_probe[-1]
        print 'coords', len(l_coords_probe)
        print 'bfactors', len(l_overlaps)
        if len(l_coords_probe) != len(l_overlaps):
            print len(l_coords_probe)
            print len(l_overlaps)
            stop

    return l_coords_probe