예제 #1
0
def mk_table_seq(traj_file, topo_file):
    """This function extracts Protein Block sequences from a trajectory file using pbxplore 
    librairy https://pbxplore.readthedocs.io/en/latest/ and stock the sequences in table_seq 
    DataFrame.

    Parameters: 

        traj_file: string, path to a trajectory file.

        topo_file: string, path to a topology file.

    Output: 

        table_seq: a pandas.DataFrame containing each sequence of PB extracted.

    """
    table_seq = pd.DataFrame()
    # Counter for frame for row name
    i = 0
    for chain_name, chain in pbx.chains_from_trajectory(traj_file, topo_file):
        i += 1
        # Get dihedrals angles to assign a PB to each position
        dihedrals = chain.get_phi_psi_angles()
        pb_seq = pbx.assign(dihedrals)
        # pbxplore need a tresholds of 5 positions to a assign a PB to a positions. Consequently the two first
        # and the two last PB of a sequence are Z which undertermined, we remove them.
        table_seq = pd.concat(
            [table_seq,
             pd.DataFrame(list(pb_seq)[2:-2], columns=[i])], axis=1)
    # For some reasons, the sequences are assigned by columns, so the table is transposed at the end
    # to put them in rows
    table_seq = table_seq.transpose()
    table_seq.columns = list(range(2, len(table_seq.columns) + 2))

    return (table_seq)
예제 #2
0
def pbassign_cli():
    """
    PBassign command line.
    """
    options, pdb_name_lst = user_inputs()

    if options.p:
        if pdb_name_lst:
            print("{} PDB file(s) to process".format(len(pdb_name_lst)))
        else:
            print("Nothing to do. Good bye.")
            return
        # PB assignement of PDB structures
        chains = pbx.chains_from_files(pdb_name_lst)
    else:
        # PB assignement of a Gromacs trajectory
        chains = pbx.chains_from_trajectory(options.x, options.g)

    all_comments = []
    all_sequences = []
    for comment, chain in chains:
        dihedrals = chain.get_phi_psi_angles()
        sequence = pbx.assign(dihedrals)
        all_comments.append(comment)
        all_sequences.append(sequence)

    fasta_name = options.o + ".PB.fasta"
    with open(fasta_name, "w") as outfile:
        pbx.io.write_fasta(outfile, all_sequences, all_comments)

    print("wrote {0}".format(fasta_name))
예제 #3
0
 def __init__(self, args):
     self.md_sa_seq = []
     if args.f:
         file_output_fasta = open(args.f, "w")
     for chain_name, chain in pbx.chains_from_trajectory(\
                                                     args.input_trajectory,\
                                                     args.input_topology):
         dihedrals = chain.get_phi_psi_angles()
         pb_seq = pbx.assign(dihedrals)
         self.md_sa_seq.append(pb_seq)
         if args.f:
             file_output_fasta.write(">{}\n".format(chain_name))
             file_output_fasta.write("{}\n".format(pb_seq))
     if args.f:
         file_output_fasta.close()
예제 #4
0
    def test_loader_xtc(self):
        """
        Test for API load function on xtc files
        """
        topol = os.path.join(here, "test_data/barstar_md_traj.gro")
        traj = os.path.join(here, "test_data/barstar_md_traj.xtc")
        chains = list(pbx.chains_from_trajectory(traj, topol))

        comment, chain = chains[0]
        ref_comment = "{0} | frame 0".format(traj)
        ref_chain = "Chain  / model : 355 atoms"
        assert ref_comment == comment
        assert ref_chain == format(chain)

        comment, chain = chains[-1]
        ref_comment = "{0} | frame 9".format(traj)
        assert ref_comment == comment
        assert ref_chain == format(chain)
예제 #5
0
    def test_loader_xtc(self):
        """
        Test for API load function on xtc files
        """
        topol = os.path.join(here, "test_data/barstar_md_traj.gro")
        traj = os.path.join(here, "test_data/barstar_md_traj.xtc")
        chains = list(pbx.chains_from_trajectory(traj, topol))

        comment, chain = chains[0]
        ref_comment = "{0} | frame 0".format(traj)
        ref_chain = "Chain  / model : 355 atoms"
        self.assertEqual(ref_comment, comment)
        self.assertEqual(ref_chain, format(chain))

        comment, chain = chains[-1]
        ref_comment = "{0} | frame 9".format(traj)
        self.assertEqual(ref_comment, comment)
        self.assertEqual(ref_chain, format(chain))
예제 #6
0
    def pbassign_func(self):
        """
        Assigns a PB sequence (16 Structural Prototypes) to the protein.

        The data file is taken from the listbox.

        :return:PB Sequences in output fasta file.Call other two
        functions to save the resulted fasta file and to count the
        probability matrix for PBs.
        """

        # Get paths for the trajectory and topology files directly from
        # the listbox.
        trajectory = self.trajectory_file.get()
        topology = self.topology_file.get()

        # Start PB assignment
        names = []
        pb_sequences = []
        for chain_name, chain in pbx.chains_from_trajectory(
                trajectory=trajectory, topology=topology):
            dihedrals = chain.get_phi_psi_angles()
            pb_seq = pbx.assign(dihedrals)
            names.append(chain_name)
            pb_sequences.append(pb_seq)

        # Record the progress in the log screen
        self.txt_log.insert('end', ">>> PB assignment is finished" + "\n\n")

        # Save the Protein Blocks Sequences into one fasta file
        # The saving process is done via the function save_fasta()
        save_it.save_fasta(input_seqs=pb_sequences,
                           names=names,
                           file_name=os.path.basename(trajectory))
        # Record the progress in the log screen
        self.txt_log.insert(
            'end', ">>> PB Sequences were saved into "
            "a Fasta file" + "\n\n")

        # Call count_occurrence() to compute the occurrence of each PB.
        self.count_occurrence(seq=pb_sequences)
예제 #7
0
def pbassign_cli():
    """
    PBassign command line.
    """
    options, pdb_name_lst = user_inputs()

    if options.p:
        if pdb_name_lst:
            print("{} PDB file(s) to process".format(len(pdb_name_lst)))
        else:
            print('Nothing to do. Good bye.')
            return
        # PB assignement of PDB structures
        chains = pbx.chains_from_files(pdb_name_lst)
    else:
        # PB assignement of a Gromacs trajectory
        chains = pbx.chains_from_trajectory(options.x, options.g)

    all_comments = []
    all_sequences = []
    for comment, chain in chains:
        try:
            dihedrals = chain.get_phi_psi_angles()
            sequence = pbx.assign(dihedrals)
            all_comments.append(comment)
            all_sequences.append(sequence)
        except FloatingPointError:
            print(
                "The computation of angles produced NaN. This typically means there are issues"
                " with some residues coordinates. Check your input file ({0})".
                format(comment),
                file=sys.stderr)

    if all_comments:
        fasta_name = options.o + ".PB.fasta"
        with open(fasta_name, 'w') as outfile:
            pbx.io.write_fasta(outfile, all_sequences, all_comments)

        print("wrote {0}".format(fasta_name))
    else:
        print("No output file was written")
예제 #8
0
def cli(args=None):
    """Entry point for seq_to_first_iso's CLI.

    Parameters
    ----------
    args : list of str, optional
        CLI arguments, args are used for testing (default is None for CLI).

    Returns
    -------
    None
        Writes a csv file and possibly a gml file.

    Raises
    ------
    SystemExit
        If no sequences were found on the file.

    Notes
    -----
    Main function of the script, for use with CLI.

    """
    if not args:
        args = sys.argv[1:]

    options, pdb_name_lst = user_inputs()
    if options.pdb:
        if pdb_name_lst:
            print("{} PDB file(s) to process".format(len(pdb_name_lst)))
        else:
            print('Nothing to do. Good bye.')
            return
        # PB assignement of PDB structures
        chains = pbx.chains_from_files(pdb_name_lst)
    else:
        # PB assignement of a Gromacs trajectory
        chains = pbx.chains_from_trajectory(options.x, options.g)

    all_comments = []
    all_sequences = []

    for comment, chain in chains:
        try:
            dihedrals = chain.get_phi_psi_angles()
            sequence = pbx.assign(dihedrals)
            all_comments.append(comment)
            all_sequences.append(sequence)
        except FloatingPointError:
            log.error("The computation of angles produced NaN. "
                      "This typically means there are issues with "
                      "some residues coordinates. "
                      f"Check your input file ({comment})")

    log.info(f"There are {len(all_sequences)} sequences of length "
             f"{len(all_sequences[0])}")
    log.info("Calculating the Mutual Information matrix ...")
    MI_matrix = mutual_information_matrix(all_sequences)
    # Write to a file
    log.info(f"Writing the matrix as {options.output} ...")
    df = pd.DataFrame(MI_matrix)
    df.to_csv(options.output)

    # Add option in CLI, centrality
    # Creating a network.
    if options.network:
        log.info("Creating a network ...")
        PB_graph = interaction_graph(MI_matrix)
        log.info(f"Writing the network as {options.network} ...")
        # Write the graph to GML format.
        nx.write_gml(PB_graph, path=options.network)