def dist_angle_calculator(u_frag,
                          u_overlap,
                          v_frag,
                          v_overlap,
                          u_connections=1,
                          v_connections=1,
                          verbose=True):
    u_frag = read_xyz(u_frag)[0].get_positions()
    u_overlap = read_xyz(u_overlap)[0].get_positions()
    v_frag = read_xyz(v_frag)[0].get_positions()
    v_overlap = read_xyz(v_overlap)[0].get_positions()

    u_exits, u_linkeds = coordinate_finder(u_frag, u_overlap, u_connections,
                                           verbose)
    v_exits, v_linkeds = coordinate_finder(v_frag, v_overlap, v_connections,
                                           verbose)

    distances = np.zeros((u_exits.shape[0], v_exits.shape[0]))
    angles = np.zeros((u_exits.shape[0], v_exits.shape[0]))
    for i, u_exit in enumerate(u_exits):
        u_linked = u_linkeds[i, :]
        for j, v_exit in enumerate(v_exits):
            v_linked = v_linkeds[j, :]
            distances[i,
                      j], angles[i,
                                 j] = distance_angle(u_exit, u_linked, v_exit,
                                                     v_linked)
    return distances, angles
def return_overlap(x1, x2, radius=0.8, n_overlap=3, make_plot=False, verbose=False):
    """
    Calculates the Euclidean distance matrix between 2 fragments, and returns True or False depending on whether or not
    the fragments overlap in space, as determined by a threshold on the Euclidean distance.

    :param x1, x2: .xyz files of the fragment atom coordinates
    :param radius: threshold for determining atom overlap (in angstroms)
    :param n_overlap: number of overlapping atoms for determining fragment pair overlap
    :param make_plot: set to True to plot the euclidean distance matrix
    :param verbose: set to True to print the number of overlapping atoms for each x1, x2
    :return:
    """

    # read 2 xyz files
    u_atoms = read_xyz(x1)[0]
    v_atoms = read_xyz(x2)[0]

    # loop over atom xyz vectors, calculate euclidean distance
    dist_mat = np.empty((len(u_atoms),len(v_atoms)))
    for i, u in enumerate(u_atoms.get_positions()):
        for j,v in enumerate(v_atoms.get_positions()):
            dist_mat[i,j] = euc_dist(u, v)

    if make_plot:
        plt.figure(figsize=(7,7))
        plt.matshow(dist_mat, fignum=1)
        cb = plt.colorbar(fraction=0.046, pad=0.04)
        plt.title('Euclidean Distance Matrix')
        plt.savefig('euc_dist.png')

    # apply distance, return True or False for overlap
    overlap_atoms = np.where(np.less_equal(dist_mat,radius),1,0)
    num_overlap_atoms = np.sum(overlap_atoms)

    overlap_atom_indices = np.argwhere(np.less_equal(dist_mat,radius)).T

    if num_overlap_atoms>=n_overlap: # choose n_overlap=4 for planar overlap
        u_overlaps =  u_atoms[overlap_atom_indices[0]]
        v_overlaps = v_atoms[overlap_atom_indices[1]]

        del u_atoms[overlap_atom_indices[0]]
        del v_atoms[overlap_atom_indices[1]]

        if verbose:
            print('Overlap found! Number of overlapping atoms: {}'.format(num_overlap_atoms))
        return True, u_atoms, v_atoms, u_overlaps, v_overlaps
    else:
        if verbose:
            print('No overlap found :(')
        return False, u_atoms, v_atoms, None, None
Exemple #3
0
def initialise_system(args):
    """
    Reads in a .csv file and generates a population of RDKit molecules, as well as reading in target ligand coordinates

    :param args: system arguments parsed into main - should contain args.csv, args.tgt (and args.tgt2)

    :return: population, tgt_atoms, tg_species
     or population, tgt_atoms, tgt_species, tgt_atoms2, tgt_species2
    """
    population = []
    csv = pd.read_csv(args.csv, header=0)
    for i, row in csv.iterrows():
        population.append(Chem.MolFromSmiles(row['SMILES']))

    tgt_atoms, _, _, tgt_species = read_xyz(args.tgt)
    if args.tgt2 is not None:
        tgt_atoms2, _, _, tgt_species2 = read_xyz(args.tgt2)
        tgt_species = list(set().union(tgt_species, tgt_species2)) # creates a single tgt_species list
        return population, tgt_atoms, tgt_species, tgt_atoms2
    else:
        return population, tgt_atoms, tgt_species
Exemple #4
0
def main(args):
    """
    Generates SOAP descriptors for the atoms saved in args.xyz
    :param args:
    :return:
    """
    mols, num_list, atom_list, species = read_xyz(args.xyz)

    soap_generator = SOAP(species=species, periodic=False, rcut=args.rcut, nmax=8, lmax=6, sigma=args.sigma, sparse=True)

    soap = soap_generator.create(mols)

    soap = normalize(soap, copy=False)

    np.save(args.tgt, [soap])
Exemple #5
0
def main(args):
    if args.task != 'IC50':
        mols, num_list, atom_list, species = read_xyz('data/' + args.task +
                                                      '.xyz')
    else:
        mols, num_list, atom_list, species = read_xyz('data/' + args.task +
                                                      '/' + args.subtask +
                                                      '.xyz')

    dat_size = len(mols)

    mpi_comm = MPI.COMM_WORLD
    mpi_rank = mpi_comm.Get_rank()
    mpi_size = mpi_comm.Get_size()

    if mpi_rank == 0:
        print("\nEvaluating " + data_name + " rematch on " + str(mpi_size) +
              " MPI processes.\n")
        print('No. of molecules = {}\n'.format(dat_size))
        print('Elements present = {}\n'.format(species))

    # Setting up the SOAP descriptor
    rcut_small = 3.0
    sigma_small = 0.2
    rcut_large = 6.0
    sigma_large = 0.4

    small_soap = SOAP(species=species,
                      periodic=False,
                      rcut=rcut_small,
                      nmax=12,
                      lmax=8,
                      sigma=sigma_small,
                      sparse=True)

    large_soap = SOAP(species=species,
                      periodic=False,
                      rcut=rcut_large,
                      nmax=12,
                      lmax=8,
                      sigma=sigma_large,
                      sparse=True)

    t0 = time.time()
    my_border_low, my_border_high = return_borders(
        mpi_rank, dat_size, mpi_size)  # split indices between MPI processes

    my_mols = mols[my_border_low:my_border_high]
    soap = scipy.sparse.hstack(
        [small_soap.create(my_mols),
         large_soap.create(my_mols)])  # generate atomic descriptors

    t1 = time.time()
    if mpi_rank == 0:
        print("SOAP: {:.2f}s\n".format(t1 - t0))
        print(
            "rcut_small = {:.1f}, sigma_small = {:.1f}, rcut_large = {:.1f}, sigma_large = {:.1f}"
            .format(rcut_small, sigma_small, rcut_large, sigma_large))

    soap = normalize(soap, copy=False)
    my_soap = split_by_lengths(soap, num_list[my_border_low:my_border_high]
                               )  # group atomic descriptors by molecule
    my_len = len(my_soap)

    t2 = time.time()
    if mpi_rank == 0:
        print("Normalise & Split Descriptors: {:.2f}s\n".format(t2 - t1))

    if args.save_soap:  # save to args.soap_path for use with gpr_onthefly.py
        for i, mat in enumerate(my_soap):
            if args.task != 'IC50':
                scipy.sparse.save_npz(
                    args.soap_path + args.task + '_soap_' +
                    str(i + my_border_low), mat)
            else:
                scipy.sparse.save_npz(
                    args.soap_path + args.subtask + '_soap_' +
                    str(i + my_border_low), mat)

    if args.save_kernel:  # save to args.kernel_path for use with gpr_soap.py
        re = REMatchKernel(metric="polynomial",
                           degree=3,
                           gamma=1,
                           coef0=0,
                           alpha=0.5,
                           threshold=1e-6,
                           normalize_kernel=True)

        K = np.zeros((my_len, dat_size), dtype=np.float32)
        sendcounts = np.array(mpi_comm.gather(my_len * dat_size, root=0))

        if mpi_rank == 0:
            K_full = np.empty((dat_size, dat_size), dtype=np.float32)
            print("K memory usage(bytes): {}".format(K.nbytes + K_full.nbytes))
        else:
            K_full = None

        #row-parallelised kernel computation
        for index in range(0, mpi_size):
            if index == mpi_rank:
                K[:,
                  my_border_low:my_border_high] += re.create(my_soap).astype(
                      np.float32)
                continue  #skip useless calculation

            start, end = return_borders(index, dat_size, mpi_size)
            ref_mols = mols[start:end]
            ref_soap = scipy.sparse.hstack(
                [small_soap.create(ref_mols),
                 large_soap.create(ref_mols)])
            ref_soap = normalize(ref_soap, copy=False)
            ref_soap = split_by_lengths(ref_soap, num_list[start:end])
            K[:, start:end] += re.create(my_soap, ref_soap).astype(np.float32)

        #Gather kernel rows
        mpi_comm.Gatherv(sendbuf=K, recvbuf=(K_full, sendcounts), root=0)

        K = K_full

        if mpi_rank == 0:
            t3 = time.time()
            print("Normalised Kernel: {:.2f}s\n".format(t3 - t2))

            np.save(args.kernel_path + data_name + '_soap', K)
            print(K)

    mpi_comm.Barrier()
    MPI.Finalize()