예제 #1
0
def _frame_loader(yaml_file, prt, key_list, indices, save_trj, fname=None):
    traj_list=[]
    #if not an iterator make it
    if type(indices)!=list and indices.shape==(2,):
        indices = [indices]
    for ind in indices:
        traj_index, frame_index = ind
        traj_name = key_list[traj_index]
        traj_list.append(load_frame(yaml_file["base_dir"],
                                    prt.name, yaml_file["protein_dir"],
                                    traj_name, frame_index))

    trj = traj_list[0] + traj_list[1:]
    if save_trj and fname is not None:
        with enter_protein_mdl_dir(yaml_file, prt.name):
            trj.save_xtc("%s"%fname)
            if not os.path.isfile("prot.pdb"):
                trj[0].save_pdb("prot.pdb")

    return trj
예제 #2
0
def _frame_loader(yaml_file, prt, key_list, indices, save_trj, fname=None):
    traj_list = []
    #if not an iterator make it
    if type(indices) != list and indices.shape == (2, ):
        indices = [indices]
    for ind in indices:
        traj_index, frame_index = ind
        traj_name = key_list[traj_index]
        traj_list.append(
            load_frame(yaml_file["base_dir"], prt.name,
                       yaml_file["protein_dir"], traj_name, frame_index))

    trj = traj_list[0] + traj_list[1:]
    if save_trj and fname is not None:
        with enter_protein_mdl_dir(yaml_file, prt.name):
            trj.save_xtc("%s" % fname)
            if not os.path.isfile("prot.pdb"):
                trj[0].save_pdb("prot.pdb")

    return trj
예제 #3
0
def _get_common_features(yaml_file, featurizer, aligned_dict,save_df=True):
    """
    Function to get the common features across protein using the common residues.
    can optionally save the pandas data to the mdl_dir
    :param yaml_file: The protein yaml_file
    :param featurizer: featurizer object used.
    :param prt_mapping: Mapping of each residue to its sequence
    :param aligned_dict : Dictionary of alignments for each protein
    :return:
    """
    result_dict = {}
    df_dict={}
    for protein in yaml_file["protein_list"]:
        print(protein)
        #reset the featurizer
        featurizer = clone(featurizer)
        trj = load_random_traj(yaml_file, protein)
        df = pd.DataFrame(featurizer.describe_features(trj))
        prt_mapping, prt_seq = _map_residue_ind_seq_ind(yaml_file, protein,
                                                        aligned_dict[protein])
        feature_vec =[]
        #for every feature
        for i in df.iterrows():
            #get the index and the feature itself
            feature_ind, feature_dict = i
            all_res_in_algn = []
            mapped_index_list=[]
            for aa_ind in feature_dict["resids"]:
                aa_code = prt_seq[aa_ind]
                #make sure we have the same residue
                assert(trj.top.residue(aa_ind).code==aa_code)
                #get the mapping for that aa to the main alignment
                mapped_index = prt_mapping[aa_ind]
                #for every protein in the alignment, check if we have the same residue
                #at the same position
                all_res_in_algn.append(np.alltrue([aligned_dict[prt][mapped_index]==aa_code
                                          for prt in yaml_file["protein_list"]]))
                mapped_index_list.append(mapped_index)


            #to account for additions and deletions, we check if the difference between
            #the mapping and the actual residue codes is the same.
            mapped_index_difference = [x - mapped_index_list[i - 1]
                                       for i, x in enumerate(mapped_index_list) if i > 0]
            resid_index_difference = [x - feature_dict["resids"][i - 1]
                                       for i, x in enumerate(feature_dict["resids"]) if i > 0]
            if not np.all(mapped_index_difference==resid_index_difference):
                all_res_in_algn.append(False)


            if np.alltrue(all_res_in_algn):
                feature_vec.append(feature_ind)

        df_dict[protein] = df.iloc[feature_vec]
        result_dict[protein] = feature_vec

        if save_df:
            new_df = df.iloc[feature_vec]
            with enter_protein_mdl_dir(yaml_file, protein):
                verbosedump(new_df, os.path.join("feature_descriptor.h5"))
            with enter_protein_data_dir(yaml_file, protein):
                verbosedump(new_df, os.path.join("sliced_feature_dir",
                                                 "feature_descriptor.h5"))
    return result_dict, df_dict
예제 #4
0
def _get_common_features(yaml_file, featurizer, aligned_dict,save_df=True):
    """
    Function to get the common features across protein using the common residues.
    can optionally save the pandas data to the mdl_dir
    :param yaml_file: The protein yaml_file
    :param featurizer: featurizer object used.
    :param prt_mapping: Mapping of each residue to its sequence
    :param aligned_dict : Dictionary of alignments for each protein
    :return:
    """
    result_dict = {}
    df_dict={}
    for protein in yaml_file["protein_list"]:
        print(protein)
        #reset the featurizer
        featurizer = clone(featurizer)
        trj = load_random_traj(yaml_file, protein)
        df = pd.DataFrame(featurizer.describe_features(trj))
        prt_mapping, prt_seq = _map_residue_ind_seq_ind(yaml_file, protein,
                                                        aligned_dict[protein])
        feature_vec =[]
        #for every feature
        for i in df.iterrows():
            #get the index and the feature itself
            feature_ind, feature_dict = i
            all_res_in_algn = []
            mapped_index_list=[]
            for aa_ind in feature_dict["resids"]:
                aa_code = prt_seq[aa_ind]
                #make sure we have the same residue
                assert(trj.top.residue(aa_ind).code==aa_code)
                #get the mapping for that aa to the main alignment
                mapped_index = prt_mapping[aa_ind]
                #for every protein in the alignment, check if we have the same residue
                #at the same position
                all_res_in_algn.append(np.alltrue([aligned_dict[prt][mapped_index]==aa_code
                                          for prt in yaml_file["protein_list"]]))
                mapped_index_list.append(mapped_index)


            #to account for additions and deletions, we check if the difference between
            #the mapping and the actual residue codes is the same.
            mapped_index_difference = [x - mapped_index_list[i - 1]
                                       for i, x in enumerate(mapped_index_list) if i > 0]
            resid_index_difference = [x - feature_dict["resids"][i - 1]
                                       for i, x in enumerate(feature_dict["resids"]) if i > 0]
            if not np.all(mapped_index_difference==resid_index_difference):
                all_res_in_algn.append(False)


            if np.alltrue(all_res_in_algn):
                feature_vec.append(feature_ind)

        df_dict[protein] = df.iloc[feature_vec]
        result_dict[protein] = feature_vec

        if save_df:
            new_df = df.iloc[feature_vec]
            with enter_protein_mdl_dir(yaml_file, protein):
                verbosedump(new_df, os.path.join("feature_descriptor.h5"))
            with enter_protein_data_dir(yaml_file, protein):
                verbosedump(new_df, os.path.join("sliced_feature_dir",
                                                 "feature_descriptor.h5"))
    return result_dict, df_dict