def _frame_loader(yaml_file, prt, key_list, indices, save_trj, fname=None): traj_list=[] #if not an iterator make it if type(indices)!=list and indices.shape==(2,): indices = [indices] for ind in indices: traj_index, frame_index = ind traj_name = key_list[traj_index] traj_list.append(load_frame(yaml_file["base_dir"], prt.name, yaml_file["protein_dir"], traj_name, frame_index)) trj = traj_list[0] + traj_list[1:] if save_trj and fname is not None: with enter_protein_mdl_dir(yaml_file, prt.name): trj.save_xtc("%s"%fname) if not os.path.isfile("prot.pdb"): trj[0].save_pdb("prot.pdb") return trj
def _frame_loader(yaml_file, prt, key_list, indices, save_trj, fname=None): traj_list = [] #if not an iterator make it if type(indices) != list and indices.shape == (2, ): indices = [indices] for ind in indices: traj_index, frame_index = ind traj_name = key_list[traj_index] traj_list.append( load_frame(yaml_file["base_dir"], prt.name, yaml_file["protein_dir"], traj_name, frame_index)) trj = traj_list[0] + traj_list[1:] if save_trj and fname is not None: with enter_protein_mdl_dir(yaml_file, prt.name): trj.save_xtc("%s" % fname) if not os.path.isfile("prot.pdb"): trj[0].save_pdb("prot.pdb") return trj
def _get_common_features(yaml_file, featurizer, aligned_dict,save_df=True): """ Function to get the common features across protein using the common residues. can optionally save the pandas data to the mdl_dir :param yaml_file: The protein yaml_file :param featurizer: featurizer object used. :param prt_mapping: Mapping of each residue to its sequence :param aligned_dict : Dictionary of alignments for each protein :return: """ result_dict = {} df_dict={} for protein in yaml_file["protein_list"]: print(protein) #reset the featurizer featurizer = clone(featurizer) trj = load_random_traj(yaml_file, protein) df = pd.DataFrame(featurizer.describe_features(trj)) prt_mapping, prt_seq = _map_residue_ind_seq_ind(yaml_file, protein, aligned_dict[protein]) feature_vec =[] #for every feature for i in df.iterrows(): #get the index and the feature itself feature_ind, feature_dict = i all_res_in_algn = [] mapped_index_list=[] for aa_ind in feature_dict["resids"]: aa_code = prt_seq[aa_ind] #make sure we have the same residue assert(trj.top.residue(aa_ind).code==aa_code) #get the mapping for that aa to the main alignment mapped_index = prt_mapping[aa_ind] #for every protein in the alignment, check if we have the same residue #at the same position all_res_in_algn.append(np.alltrue([aligned_dict[prt][mapped_index]==aa_code for prt in yaml_file["protein_list"]])) mapped_index_list.append(mapped_index) #to account for additions and deletions, we check if the difference between #the mapping and the actual residue codes is the same. mapped_index_difference = [x - mapped_index_list[i - 1] for i, x in enumerate(mapped_index_list) if i > 0] resid_index_difference = [x - feature_dict["resids"][i - 1] for i, x in enumerate(feature_dict["resids"]) if i > 0] if not np.all(mapped_index_difference==resid_index_difference): all_res_in_algn.append(False) if np.alltrue(all_res_in_algn): feature_vec.append(feature_ind) df_dict[protein] = df.iloc[feature_vec] result_dict[protein] = feature_vec if save_df: new_df = df.iloc[feature_vec] with enter_protein_mdl_dir(yaml_file, protein): verbosedump(new_df, os.path.join("feature_descriptor.h5")) with enter_protein_data_dir(yaml_file, protein): verbosedump(new_df, os.path.join("sliced_feature_dir", "feature_descriptor.h5")) return result_dict, df_dict