def mk_table_seq(traj_file, topo_file): """This function extracts Protein Block sequences from a trajectory file using pbxplore librairy https://pbxplore.readthedocs.io/en/latest/ and stock the sequences in table_seq DataFrame. Parameters: traj_file: string, path to a trajectory file. topo_file: string, path to a topology file. Output: table_seq: a pandas.DataFrame containing each sequence of PB extracted. """ table_seq = pd.DataFrame() # Counter for frame for row name i = 0 for chain_name, chain in pbx.chains_from_trajectory(traj_file, topo_file): i += 1 # Get dihedrals angles to assign a PB to each position dihedrals = chain.get_phi_psi_angles() pb_seq = pbx.assign(dihedrals) # pbxplore need a tresholds of 5 positions to a assign a PB to a positions. Consequently the two first # and the two last PB of a sequence are Z which undertermined, we remove them. table_seq = pd.concat( [table_seq, pd.DataFrame(list(pb_seq)[2:-2], columns=[i])], axis=1) # For some reasons, the sequences are assigned by columns, so the table is transposed at the end # to put them in rows table_seq = table_seq.transpose() table_seq.columns = list(range(2, len(table_seq.columns) + 2)) return (table_seq)
def pbassign_cli(): """ PBassign command line. """ options, pdb_name_lst = user_inputs() if options.p: if pdb_name_lst: print("{} PDB file(s) to process".format(len(pdb_name_lst))) else: print("Nothing to do. Good bye.") return # PB assignement of PDB structures chains = pbx.chains_from_files(pdb_name_lst) else: # PB assignement of a Gromacs trajectory chains = pbx.chains_from_trajectory(options.x, options.g) all_comments = [] all_sequences = [] for comment, chain in chains: dihedrals = chain.get_phi_psi_angles() sequence = pbx.assign(dihedrals) all_comments.append(comment) all_sequences.append(sequence) fasta_name = options.o + ".PB.fasta" with open(fasta_name, "w") as outfile: pbx.io.write_fasta(outfile, all_sequences, all_comments) print("wrote {0}".format(fasta_name))
def __init__(self, args): self.md_sa_seq = [] if args.f: file_output_fasta = open(args.f, "w") for chain_name, chain in pbx.chains_from_trajectory(\ args.input_trajectory,\ args.input_topology): dihedrals = chain.get_phi_psi_angles() pb_seq = pbx.assign(dihedrals) self.md_sa_seq.append(pb_seq) if args.f: file_output_fasta.write(">{}\n".format(chain_name)) file_output_fasta.write("{}\n".format(pb_seq)) if args.f: file_output_fasta.close()
def test_loader_xtc(self): """ Test for API load function on xtc files """ topol = os.path.join(here, "test_data/barstar_md_traj.gro") traj = os.path.join(here, "test_data/barstar_md_traj.xtc") chains = list(pbx.chains_from_trajectory(traj, topol)) comment, chain = chains[0] ref_comment = "{0} | frame 0".format(traj) ref_chain = "Chain / model : 355 atoms" assert ref_comment == comment assert ref_chain == format(chain) comment, chain = chains[-1] ref_comment = "{0} | frame 9".format(traj) assert ref_comment == comment assert ref_chain == format(chain)
def test_loader_xtc(self): """ Test for API load function on xtc files """ topol = os.path.join(here, "test_data/barstar_md_traj.gro") traj = os.path.join(here, "test_data/barstar_md_traj.xtc") chains = list(pbx.chains_from_trajectory(traj, topol)) comment, chain = chains[0] ref_comment = "{0} | frame 0".format(traj) ref_chain = "Chain / model : 355 atoms" self.assertEqual(ref_comment, comment) self.assertEqual(ref_chain, format(chain)) comment, chain = chains[-1] ref_comment = "{0} | frame 9".format(traj) self.assertEqual(ref_comment, comment) self.assertEqual(ref_chain, format(chain))
def pbassign_func(self): """ Assigns a PB sequence (16 Structural Prototypes) to the protein. The data file is taken from the listbox. :return:PB Sequences in output fasta file.Call other two functions to save the resulted fasta file and to count the probability matrix for PBs. """ # Get paths for the trajectory and topology files directly from # the listbox. trajectory = self.trajectory_file.get() topology = self.topology_file.get() # Start PB assignment names = [] pb_sequences = [] for chain_name, chain in pbx.chains_from_trajectory( trajectory=trajectory, topology=topology): dihedrals = chain.get_phi_psi_angles() pb_seq = pbx.assign(dihedrals) names.append(chain_name) pb_sequences.append(pb_seq) # Record the progress in the log screen self.txt_log.insert('end', ">>> PB assignment is finished" + "\n\n") # Save the Protein Blocks Sequences into one fasta file # The saving process is done via the function save_fasta() save_it.save_fasta(input_seqs=pb_sequences, names=names, file_name=os.path.basename(trajectory)) # Record the progress in the log screen self.txt_log.insert( 'end', ">>> PB Sequences were saved into " "a Fasta file" + "\n\n") # Call count_occurrence() to compute the occurrence of each PB. self.count_occurrence(seq=pb_sequences)
def pbassign_cli(): """ PBassign command line. """ options, pdb_name_lst = user_inputs() if options.p: if pdb_name_lst: print("{} PDB file(s) to process".format(len(pdb_name_lst))) else: print('Nothing to do. Good bye.') return # PB assignement of PDB structures chains = pbx.chains_from_files(pdb_name_lst) else: # PB assignement of a Gromacs trajectory chains = pbx.chains_from_trajectory(options.x, options.g) all_comments = [] all_sequences = [] for comment, chain in chains: try: dihedrals = chain.get_phi_psi_angles() sequence = pbx.assign(dihedrals) all_comments.append(comment) all_sequences.append(sequence) except FloatingPointError: print( "The computation of angles produced NaN. This typically means there are issues" " with some residues coordinates. Check your input file ({0})". format(comment), file=sys.stderr) if all_comments: fasta_name = options.o + ".PB.fasta" with open(fasta_name, 'w') as outfile: pbx.io.write_fasta(outfile, all_sequences, all_comments) print("wrote {0}".format(fasta_name)) else: print("No output file was written")
def cli(args=None): """Entry point for seq_to_first_iso's CLI. Parameters ---------- args : list of str, optional CLI arguments, args are used for testing (default is None for CLI). Returns ------- None Writes a csv file and possibly a gml file. Raises ------ SystemExit If no sequences were found on the file. Notes ----- Main function of the script, for use with CLI. """ if not args: args = sys.argv[1:] options, pdb_name_lst = user_inputs() if options.pdb: if pdb_name_lst: print("{} PDB file(s) to process".format(len(pdb_name_lst))) else: print('Nothing to do. Good bye.') return # PB assignement of PDB structures chains = pbx.chains_from_files(pdb_name_lst) else: # PB assignement of a Gromacs trajectory chains = pbx.chains_from_trajectory(options.x, options.g) all_comments = [] all_sequences = [] for comment, chain in chains: try: dihedrals = chain.get_phi_psi_angles() sequence = pbx.assign(dihedrals) all_comments.append(comment) all_sequences.append(sequence) except FloatingPointError: log.error("The computation of angles produced NaN. " "This typically means there are issues with " "some residues coordinates. " f"Check your input file ({comment})") log.info(f"There are {len(all_sequences)} sequences of length " f"{len(all_sequences[0])}") log.info("Calculating the Mutual Information matrix ...") MI_matrix = mutual_information_matrix(all_sequences) # Write to a file log.info(f"Writing the matrix as {options.output} ...") df = pd.DataFrame(MI_matrix) df.to_csv(options.output) # Add option in CLI, centrality # Creating a network. if options.network: log.info("Creating a network ...") PB_graph = interaction_graph(MI_matrix) log.info(f"Writing the network as {options.network} ...") # Write the graph to GML format. nx.write_gml(PB_graph, path=options.network)