def test_distance_array_parallel(self): cython_parallel = distance_array(self.coord[0], self.coord[1]) assert_allclose( cython_parallel, self.ref, rtol=1e-6, atol=1e-6, err_msg="Cython parallel distance matrix does not match C")
def pairwise_distances(cls, com_matrix): """ build pair-wise distance matrix for center of mass coordinates """ #----------------- # CALL MDAnalysis.core.distances.distance_array # instead of the MDAnalysis.core.parallel.distances (which requires inputs to be of Cython DTYPE_t type) return distance_module.distance_array(com_matrix, com_matrix)
def test_distance_array_parallel_results(self): result = np.empty((self.coord[0].shape[0], self.coord[0].shape[0])).astype(np.float32) cython_parallel = distance_array(self.coord[0], self.coord[1], result=result) assert_allclose( cython_parallel, self.ref, rtol=1e-6, atol=1e-6, err_msg="Cython parallel distance matrix does not match C")
def test_PBC2(self): a = np.array([7.90146923, -13.72858524, 3.75326586], dtype=np.float32) b = np.array([-1.36250901, 13.45423985, -0.36317623], dtype=np.float32) box = np.array([5.5457325, 5.5457325, 5.5457325], dtype=np.float32) def mindist(a, b, box): x = a - b return np.linalg.norm(x - np.rint(x / box) * box) ref = mindist(a, b, box) val = distance_array(np.array([a]), np.array([b]), box)[0, 0] assert_allclose( val, ref, rtol=1e-6, atol=1e-6, err_msg="Issue 151 not correct (PBC in distance array)")
def count_contacts(gro_file, trr_file, prot_seq, protein_residue_list, nprots, lipid, bs_dict, file_label, nframes, stride=1, lipid_part='headgroup', cutoff=6.5): universe = MDAnalysis.Universe(gro_file, trr_file) protein_res_total = len(prot_seq) lipid_selection = 'resname {} and ('.format(lipid) for bead in lipid_particles[lipid_part][lipid]: lipid_selection += 'name {} or '.format(bead) lipid_selection = lipid_selection[:-4] + ')' lipids = universe.selectAtoms(lipid_selection) n_lipid_beads = len(lipid_particles[lipid_part][lipid]) n_lipids = lipids.numberOfAtoms() / n_lipid_beads #initialise protein-lipid interactions frequency list # initialise data storage n_contacts_dict = {} n_contacts_dict_pertime_avg = {} n_contacts_dict_pertime_stdev = {} for bs_annotation in bs_dict.keys(): n_contacts_dict[bs_annotation] = [] n_contacts_dict_pertime_avg[bs_annotation] = {} n_contacts_dict_pertime_stdev[bs_annotation] = {} for prot in range(nprots): n_contacts_dict_pertime_avg[bs_annotation][prot] = [] n_contacts_dict_pertime_stdev[bs_annotation][prot] = [] #print bs_dict, n_contacts_dict startTime = time.time() print 'Here we go...' frame = 0 for ts in universe.trajectory[:nframes+1:stride]: if frame >= nframes: print 'Have reached maximum number of frames specified. Stopping...' continue for i in range(nprots): single_prot = universe.segments[0][range(i*protein_res_total,(i+1)*protein_res_total)] for bs_annotation in bs_dict.keys(): #print 'i, bs_annotation', i, bs_annotation bs_residues = bs_dict[bs_annotation] repeat_res_list = i*protein_res_total + numpy.array(bs_residues) single_prot_bs_coords = universe.segments[0][repeat_res_list].coordinates() all_dists = distance_array(single_prot_bs_coords, lipids.coordinates(), ts.dimensions) #print 'all_dists.shape', all_dists.shape #print 'bs_residues, prot_seq[bs_residues]', bs_residues, numpy.array(list(prot_seq))[bs_residues] prot_split_bs = make_split_list_single_prot(numpy.array(list(prot_seq))[bs_residues]) protein_lipid_dist_perresidue_all = numpy.array([[x.min() for x in numpy.split(lip, prot_split_bs, axis=0)] for lip in numpy.split(all_dists, n_lipids, axis=1)]) #print 'len(bs_residues), protein_lipid_dist_perresidue_all.shape', len(bs_residues), protein_lipid_dist_perresidue_all.shape bs_interactions = protein_lipid_dist_perresidue_all <= cutoff bs_interactions_ncontacts_perlipid = numpy.sum(bs_interactions, axis = 1) #print 'bs_interactions_ncontacts_perlipid.shape', bs_interactions_ncontacts_perlipid.shape if numpy.sum(bs_interactions_ncontacts_perlipid) > 0: in_contact_bs_interactions_ncontacts_perlipid = bs_interactions_ncontacts_perlipid[bs_interactions_ncontacts_perlipid>0] #print 't, pr, bs, all_dists.sh, in_ctct.sh', frame, i, bs_annotation, all_dists.shape, in_contact_bs_interactions_ncontacts_perlipid.shape n_contacts_dict[bs_annotation].append(list(in_contact_bs_interactions_ncontacts_perlipid)) n_contacts_dict_pertime_avg[bs_annotation][i].append(numpy.mean(in_contact_bs_interactions_ncontacts_perlipid)) n_contacts_dict_pertime_stdev[bs_annotation][i].append(numpy.std(in_contact_bs_interactions_ncontacts_perlipid)) else: n_contacts_dict_pertime_avg[bs_annotation][i].append(0) n_contacts_dict_pertime_stdev[bs_annotation][i].append(0) #print n_contacts_dict frame += 1 if frame % 100 == 0: print 'Frame {} took {:3f} s\r'.format(frame, time.time()-startTime) startTime = time.time() f = open('n_contacts_{}.txt'.format(file_label), 'w') f.write(str(n_contacts_dict)+'\n\n') f.write(str(n_contacts_dict_pertime_avg)+'\n\n') f.write(str(n_contacts_dict_pertime_stdev)+'\n\n') f.close() return n_contacts_dict, n_contacts_dict_pertime_avg, n_contacts_dict_pertime_stdev
def fetch_interactions(gro_file, trr_file, prot_seq, prot_name, nmonomers, lipid, prot_index_list, nframes, stride=1, lipid_part='headgroup', cutoff=65): universe = MDAnalysis.Universe(gro_file, trr_file) protein_res_total = len(prot_seq) #protein_residue_dictionary = find_prot_residues(protein_res_total, protein_residue_list, nrepeats=1) prot_split_monomer = make_split_list_single_prot(prot_seq) prot_split = [] n_prot_atoms_permonomer = len( universe.segments[0][numpy.arange(protein_res_total)].atoms) print 'n_prot_atoms_permonomer', n_prot_atoms_permonomer for m in range(nmonomers): prot_split += list( numpy.array(prot_split_monomer) + m * (n_prot_atoms_permonomer)) if (m + 1) < (nmonomers): prot_split += [(m + 1) * (n_prot_atoms_permonomer)] lipid_selection = 'resname {} and ('.format(lipid) for bead in lipid_particles[lipid_part][lipid]: lipid_selection += 'name {} or '.format(bead) lipid_selection = lipid_selection[:-4] + ')' #lipid_rep_selection = 'resname {} and name {}'.format(lipid, lipid_particles[lipid_part][lipid][0]) # ie. just choose one bead lipids = universe.selectAtoms(lipid_selection) n_lipid_beads = len(lipid_particles[lipid_part][lipid]) n_lipids = lipids.numberOfAtoms() / n_lipid_beads #lipid_reps = universe.selectAtoms(lipid_rep_selection) lipid_indices = lipids.residues.resids() startTime = time.time() print 'Here we go...' frame = 0 prot_lipid_dists = {} #sites_list = protein_sites[prot_name].keys() for protein_index in prot_index_list: prot_lipid_dists[protein_index] = {} prot_lipid_dists[protein_index] = numpy.zeros( (protein_res_total * nmonomers, n_lipids, nframes), dtype=int) for ts in universe.trajectory[::stride]: if frame >= nframes: print 'Have reached maximum number of frames specified. Stopping...' continue for protein_index in prot_index_list: ## get min dist per lipid and residue single_prot = universe.segments[0][ protein_res_total * protein_index * nmonomers + numpy.arange(protein_res_total * nmonomers)] dists = distance_array(single_prot.coordinates(), lipids.coordinates(), ts.dimensions) min_dists_per_lipid = numpy.min(numpy.array( numpy.split(dists, n_lipids, axis=1)), axis=2) #if frame == 0: # print 'min_dists_per_lipid.shape', min_dists_per_lipid.shape split_per_res = numpy.split(min_dists_per_lipid, prot_split, axis=1) min_dists_perresidue = numpy.array( [x.min(axis=1) for x in split_per_res]) #if frame == 0: # print 'min_dists_perresidue.shape', min_dists_perresidue.shape prot_lipid_dists[protein_index][:, :, frame] = min_dists_perresidue if frame == 0: print 'Frame {} (fromtraj)or{} (hardcoded) took {:3f} s\r'.format( ts.frame, frame, time.time() - startTime) frame += 1 startTime = time.time() return prot_lipid_dists, lipid_indices
def count_frequencies(gro_file, trr_file, prot_seq, protein_residue_list, nrepeats, lipid, stride=1, lipid_part='headgroup', protein_centre='centroid', protein_centre_cutoff= 60, cutoff=6.5): universe = MDAnalysis.Universe(gro_file, trr_file) protein_res_total = len(prot_seq) protein_residue_dictionary = find_prot_residues(protein_res_total, protein_residue_list, nrepeats) prot_split = make_split_list_single_prot(prot_seq) #print prot_split lipid_selection = 'resname {} and ('.format(lipid) for bead in lipid_particles[lipid_part][lipid]: lipid_selection += 'name {} or '.format(bead) lipid_selection = lipid_selection[:-4] + ')' lipid_rep_selection = 'resname {} and name {}'.format(lipid, lipid_particles[lipid_part][lipid][0]) # ie. just choose one bead lipids = universe.selectAtoms(lipid_selection) n_lipid_beads = len(lipid_particles[lipid_part][lipid]) n_lipids = lipids.numberOfAtoms() / n_lipid_beads lipid_reps = universe.selectAtoms(lipid_rep_selection) #initialise protein-lipid interactions frequency list proteinres_lipid_interactions = numpy.array([0 for i in protein_residue_list]) startTime = time.time() print 'Here we go...' frame = 0 for ts in universe.trajectory[::stride]: for i in range(nrepeats): single_prot = universe.segments[0][range(i*protein_res_total,(i+1)*protein_res_total)] # find protein centroid - or pick out residue to represent protein position if protein_centre == 'centroid': single_prot_cent = numpy.array([single_prot.centroid()]) elif (protein_centre) == int: #pick out BB of specified residue single_prot_cent = universe.segments[0][i*protein_res_total + protein_centre-1][0] # -1 is because res numbers are zero-indexed else: print 'Error: protein_centre should either be an integer residue number, or "centroid"' return None # find lipids within 60 A of prot centroid and pick those out from lipids selection close = distance_array(single_prot_cent, lipid_reps.coordinates(), ts.dimensions) < protein_centre_cutoff lipids_close_indices = [] for index in numpy.nonzero(close)[1]: # numpy.nonzero gives a tuple of arrays - the second gives the indices of lipid residues that are 'close' to the protein lipids_close_indices += range(index*n_lipid_beads, (index+1)*n_lipid_beads) # convert residue IDs to atoms IDs for 'close' lipids lipids_close = lipids[lipids_close_indices] n_lipids_close = sum(close.flatten()) # now look at prot-lipid interaction on per residue level if n_lipids_close == 0: continue # ie. nothing is added to proteinres_lipid_interactions else: all_dists = distance_array(single_prot.coordinates(), lipids_close.coordinates(), ts.dimensions) protein_lipid_dist_perresidue_all = numpy.array([[x.min() for x in numpy.split(lip, prot_split, axis=0)] for lip in numpy.split(all_dists, n_lipids_close, axis=1)]) #print protein_lipid_dist_perresidue_all[:,17] interactions = protein_lipid_dist_perresidue_all <= cutoff proteinres_lipid_interactions += numpy.sum(interactions, axis = 0) # sum over all lipids for each res #update = '\rProtein {}/{} took {:3f} s'.format(i, nrepeats, time.time()-startTime) #print update, #sys.stdout.flush() #sys.stdout.write(update) #startTime = time.time() frame += 1 print 'Frame {} took {:3f} s'.format(frame, time.time()-startTime) startTime = time.time() proteinres_lipid_interactions_dict = dict( zip(protein_residue_list, proteinres_lipid_interactions) ) print proteinres_lipid_interactions_dict return proteinres_lipid_interactions_dict