def main(): args = parser.parse_args() cg, = fuc.cgs_from_args( args, rna_type="cg", enable_logging=True) # Set log-level as a sideeffect if args.reference: args.rna = [args.reference] reference_cg, = fuc.cgs_from_args(args, rna_type="cg", enable_logging=False) else: reference_cg = None if args.externally_interacting: elems_or_nts = args.externally_interacting.split(",") nts = [] for pos in elems_or_nts: try: nt = int(pos) except Exception: d = cg.defines[elem] #Might raise a KeyError, but that is fine if d: nts.append(d[0]) else: log.warning( "Not setting %s to externally interacting, because it contains 0 nucleotides.", elem) else: nts.append(nt) cg.interacting_residues.extend([cg.seq.to_resid(nt) for nt in nts]) log.info( "The RNA now has the following elements not perticipating in interaction energies (if presenty): %s", cg.interacting_elements) if len(list(cg.stem_iterator())) < 2: raise ValueError( "No sampling can be done for structures with fewer than 2 stems") with fess.directory_utils.make_outdir(args, cg) as main_dir: cg_stri = cg.to_cg_string() with open(os.path.join(main_dir, 'input.cg'), "w") as f: print(cg_stri, file=f) try: run(args, cg, main_dir, reference_cg) except BaseException as e: with open(os.path.join(main_dir, 'exception.log'), "w") as f: print("Running on python {}, the following error occurred:". format(sys.version), file=f) print("{}: {}".format(type(e).__name__, str(e)), file=f) print(str(traceback.format_exc()), file=f) raise
def main(parser): args = parser.parse_args() with fuc.hide_traceback(): cg1, cg2 = fuc.cgs_from_args(args, nargs=2, rna_type="3d", enable_logging=True) dir1 = np.array(args.directions[0].split(","), dtype=float) dir2 = np.array(args.directions[1].split(","), dtype=float) proj1 = ftmp.Projection2D(cg1, dir1) proj2 = ftmp.Projection2D(cg2, dir2) vrs1 = np.array( [x for p in sorted(proj1._coords.keys()) for x in proj1._coords[p]]) vrs2 = np.array( [x for p in sorted(proj2._coords.keys()) for x in proj2._coords[p]]) print(ftms.rmsd(vrs1, vrs2)) if args.plot: import matplotlib.pyplot as plt fig, ax = plt.subplots() proj1.plot(ax, line2dproperties={"color": "green"}) proj2.plot(ax, line2dproperties={"color": "red"}) plt.show()
def main(args): with fuc.hide_traceback(): cg1, cg2 = fuc.cgs_from_args( args, rna_type="3d", enable_logging=True) if not (args.acc or args.rmsd or args.pdb_rmsd): showall = True else: showall = False if showall or args.acc: if cg1.defines != cg2.defines: if args.acc: print( "Cannot compare two 3d structures that do not correspond to the same RNA.") sys.exit(1) else: adj = ftms.AdjacencyCorrelation(cg1) print("ACC:\t{:.3f}".format(ftms.mcc(adj.evaluate(cg2)))) if showall or args.rmsd: print("RMSD:\t{:.3f}".format(ftms.cg_rmsd(cg1, cg2))) if showall or args.pdb_rmsd: if not pdb_rmsd(cg1, cg2): # If --pdb-rmsd was not given, just don't print it. # If it was given, we exit with non-zero exit status. if args.pdb_rmsd: print( "Cannot calculate PDB-RMSD: The two files do not contain the same chains.") sys.exit(1)
def main(args): with fuc.hide_traceback(): bg, = fuc.cgs_from_args(args, "any", enable_logging=True) multiloops, _ = bg.find_multiloop_loops() for multi in multiloops: shortened = set() for m in multi: if m[0] != 'm': continue connected_stems = list(bg.edges[m]) for to_shorten in connected_stems: if to_shorten in shortened: continue shortened.add(to_shorten) # find the stems which are connected to this multiloop # and pick a random one db = list(bg.to_dotbracket_string()) # get the side of the stem which is connected to the # multiloop (s1b, s1e) = bg.get_sides(to_shorten, m) # print to_shorten, s1b, "(", bg.defines[to_shorten], ")" # the nucleotides that need to be changed to_change = bg.get_side_nucleotides(to_shorten, s1b) # print bg.defines[to_shorten], to_change db[to_change[0] - 1] = '.' db[to_change[1] - 1] = '.' print("".join(db))
def main(): args = parser.parse_args() cgs = fuc.cgs_from_args( args, rna_type="cg", enable_logging=True) # Set log-level as a sideeffect for cg in cgs: if len(list(cg.stem_iterator())) < 2: raise ValueError( "No sampling can be done for structures with fewer than 2 stems" ) with fess.directory_utils.make_outdir(args, cgs[0]) as main_dir: for i, cg in enumerate(cgs): cg_stri = cg.to_cg_string() with open(os.path.join(main_dir, 'input{}.cg'.format(i)), "w") as f: print(cg_stri, file=f) try: run(args, cgs, main_dir) except BaseException as e: with open(os.path.join(main_dir, 'exception.log'), "w") as f: print("Running on python {}, the following error occurred:". format(sys.version), file=f) print("{}: {}".format(type(e).__name__, str(e)), file=f) print(str(traceback.format_exc()), file=f) raise
def main(args): with fuc.hide_traceback(): cg1, cg2 = fuc.cgs_from_args(args, rna_type="3d", enable_logging=True) if not (args.acc or args.rmsd or args.pdb_rmsd): showall = True else: showall = False if showall or args.acc: if cg1.defines != cg2.defines: if args.acc: print( "Cannot compare two 3d structures that do not correspond to the same RNA." ) sys.exit(1) else: adj = ftms.AdjacencyCorrelation(cg1) print("ACC:\t{:.3f}".format(ftms.mcc(adj.evaluate(cg2)))) if showall or args.rmsd: print("RMSD:\t{:.3f}".format(ftms.cg_rmsd(cg1, cg2))) if showall or args.pdb_rmsd: if not pdb_rmsd(cg1, cg2): # If --pdb-rmsd was not given, just don't print it. # If it was given, we exit with non-zero exit status. if args.pdb_rmsd: print( "Cannot calculate PDB-RMSD: The two files do not contain the same chains." ) sys.exit(1)
def main(parser): args = parser.parse_args() with fuc.hide_traceback(): cg1, cg2 = fuc.cgs_from_args( args, rna_type="3d", enable_logging=True) dir1 = np.array(args.directions[0].split(","), dtype=float) dir2 = np.array(args.directions[1].split(","), dtype=float) proj1 = ftmp.Projection2D(cg1, dir1) proj2 = ftmp.Projection2D(cg2, dir2) vrs1 = np.array([x for p in sorted(proj1._coords.keys()) for x in proj1._coords[p]]) vrs2 = np.array([x for p in sorted(proj2._coords.keys()) for x in proj2._coords[p]]) print(ftms.rmsd(vrs1, vrs2)) if args.plot: import matplotlib.pyplot as plt fig, ax = plt.subplots() proj1.plot(ax, line2dproperties={"color": "green"}) proj2.plot(ax, line2dproperties={"color": "red"}) plt.show()
def main(): parser = fuc.get_rna_input_parser( "Train a classifier for A-Minior interactions.", nargs="+", rna_type="only_cg") parser.add_argument("--fr3d-result", type=str, required=True, help="A file containing the FR3D output") parser.add_argument("--fr3d-query", type=str, help="Add this string describing the FR3D query " "as a comment to the trainingsdata-out file") parser.add_argument("--chain-id-mapping-dir", type=str, help="If you use PDB-bundles, this directory " "needs to hold all chain-id-mapping.txt files.") parser.add_argument("--trainingsdata-out", type=str, default="forgi/threedee/data/aminor_geometries.csv", help="File that will be written for the geometries " "of interactions and non-interactions.") parser.add_argument("--model-params-out", type=str, default="forgi/threedee/data/aminor_params.json", help="File that will be written for the " "model's hyper-parameters.") parser.add_argument("--test-set", type=str, help="':'-separated PDB-ids" " for the test-set.") parser.add_argument("--train-set", type=str, help="':'-separated PDB-ids for the train-set." "Note: This is only used for cross-validation." "The final model will be trained on all the data.") args = parser.parse_args() cgs, cg_filenames = fuc.cgs_from_args(args, rna_type="only_cg", return_filenames=True) ftcta.create_geometry_file(args.trainingsdata_out, cgs, cg_filenames, args.fr3d_result, args.chain_id_mapping_dir, args.fr3d_query) hyper_params = ftcta.tune_model(args.trainingsdata_out, args.train_set, args.test_set) with open(args.model_params_out, "w") as f: json.dump(hyper_params, f)
def __init__(self, ref_fn, name, args): """ :param ref_fn: A file name of an RNA. """ super(StemRMSD, self).__init__() try: args.rna = [ref_fn] reference_cg, = fuc.cgs_from_args(args, rna_type="cg", enable_logging=False) except Exception as e: log.exception("Cannot create StemRMSD.") self.silent = True return self._reference = reference_cg if name: self.header = ["stem_rmsd_of_" + name] self.has_logged = False
def main(args): rec = fbr.Reconstructor(args.source_pdb_dir, args.source_cg_dir, args.server) with fuc.hide_traceback(): # Applies only to WrongFileFormat cgs, fns = fuc.cgs_from_args(args, rna_type="only_cg", enable_logging=True, return_filenames=True) # Preprocessing most_common_pdbs = collections.Counter() for cg in cgs: sm = fbm.SpatialModel(cg) sm.load_sampled_elems(None) curr_fns = set() for stat in sm.elem_defs.values(): stat_name = stat.pdb_name pdb_basename = stat_name.split(":")[0] pdb_filename = op.expanduser(op.join(rec.pdb_library_path, "_".join(pdb_basename.split("_")[:-1])+".cif")) try: with open(pdb_filename): pass except IOError: pdb_filename = pdb_filename.rstrip(".cif")+".pdb" curr_fns.add(pdb_filename) for fn in curr_fns: most_common_pdbs[fn]+=1 for fn, count in most_common_pdbs.most_common(250): if count==1: break print("Preloading {}, used {} times".format(fn, count)) rec.get_pdb(fn, True) print("Preloading of most common PDBs done") logging.getLogger("forgi").setLevel(logging.ERROR) logging_exceptions.config_from_args(args) for i, cg in enumerate(cgs): try: fn = fns[i] reconstruct(cg, fn, args, rec) except Exception as e: logging_exceptions.log_exception(e) log.exception("During reconstruction of cg %s, an error occurred: %s", fn, e) i=0
def main(): parser = fuc.get_rna_input_parser("Train a classifier for A-Minior interactions.", nargs="+", rna_type="only_cg") parser.add_argument("--fr3d-result", type=str, required=True, help="A file containing the FR3D output") parser.add_argument("--fr3d-query", type=str, help="Add this string describing the FR3D query " "as a comment to the trainingsdata-out file") parser.add_argument("--chain-id-mapping-dir", type=str, help="If you use PDB-bundles, this directory " "needs to hold all chain-id-mapping.txt files.") parser.add_argument("--trainingsdata-out", type=str, default="forgi/threedee/data/aminor_geometries.csv", help="File that will be written for the geometries " "of interactions and non-interactions.") parser.add_argument("--model-params-out", type=str, default="forgi/threedee/data/aminor_params.json", help="File that will be written for the " "model's hyper-parameters.") parser.add_argument("--test-set", type=str, help="':'-separated PDB-ids" " for the test-set.") parser.add_argument("--train-set", type=str, help="':'-separated PDB-ids for the train-set." "Note: This is only used for cross-validation." "The final model will be trained on all the data.") args = parser.parse_args() cgs, cg_filenames = fuc.cgs_from_args(args, rna_type="only_cg", return_filenames=True) ftcta.create_geometry_file(args.trainingsdata_out, cgs, cg_filenames, args.fr3d_result, args.chain_id_mapping_dir, args.fr3d_query) hyper_params = ftcta.tune_model( args.trainingsdata_out, args.train_set, args.test_set) with open(args.model_params_out, "w") as f: json.dump(hyper_params, f)
#parser.add_argument('--ame-pdb-id-file', help='For the AMinor energy, only consider pdb ids from this file', type=str) parser.add_argument('--precalculated-ame-orient', default=False, action="store_true") return parser parser = get_parser() if __name__ == "__main__": args = parser.parse_args() #Logging logging.basicConfig( format="%(levelname)s:%(name)s:%(funcName)s[%(lineno)d]: %(message)s") logging.captureWarnings(True) cgs = fuc.cgs_from_args(args, "+") log = logging.getLogger(__name__) log.info(fau.get_version_string()) if args.use_subgraphs is None: use_subgraphs = max(1, math.ceil(10000 / len(cg_files))) else: use_subgraphs = args.use_subgraphs if not args.plot_only: #Generating the files ## ROG if args.rog_target_file: fbe.RadiusOfGyrationEnergy.generate_target_distribution(
def main(parser): args = parser.parse_args() poss = c.defaultdict(list) sources = c.defaultdict(list) cgs = fuc.cgs_from_args(args, rna_type="pdb", enable_logging=True) for i, cg in enumerate(cgs): if len(list(cg.stem_iterator())) == 0: log.warning("Skipping RNA %s (%s): no stems", i, cg.pdb_name) continue for d in cg.defines.keys(): if np.allclose(cg.coords[d][0], cg.coords[d][1]): log.warning( "Skipping element %s of RNA %s (%s): degenerate coordinates.", d, i, cg.pdb_name) continue origin, basis = ftug.element_coord_system(cg, d) if d[0] == 'i' or d[0] == 'm': conn = cg.connections(d) conn_type = cg.connection_type(d, conn) else: conn_type = 0 for i, r in it.izip(it.count(), cg.define_residue_num_iterator(d)): # add only the base atoms which are relevant to the calculation # of the chi torsion angle seq_id = cg.seq_ids[r - 1] resname = cg.chains[seq_id.chain][seq_id.resid].resname.strip() if resname not in ftup.chi_torsion_atoms.keys(): print("Unknown nucleotide name:", resname, file=sys.stderr) continue atoms = ftup.nonsidechain_atoms + \ ftup.chi_torsion_atoms[resname][-2:] scatoms = ftup.side_chain_atoms[resname] for aname in atoms + scatoms: try: resid = cg.seq_ids[r - 1] a = cg.chains[resid.chain][resid.resid][aname] except KeyError as ke: # missing an atom continue # The C1'->B1 and B1->B2 vectors define the plane of the base # The O4'->C1'->B1->B2 sequence defines the torsion # angle chi if aname == ftup.chi_torsion_atoms[resname][-2]: aname = 'B1' elif aname == ftup.chi_torsion_atoms[resname][-1]: aname = 'B2' elif aname in scatoms: aname = resname + "." + aname avec = a.get_vector().get_array() atom_pos = ftuv.change_basis(avec - origin, basis, ftuv.standard_basis) identifier = "%s %s %d %d %s" % (d[0], " ".join( map(str, cg.get_node_dimensions(d))), conn_type, i, aname) poss[identifier] += [atom_pos] sources[identifier] += [d] print("{}:{}".format(identifier, ",".join(map(str, atom_pos))))
out_column = [] for i in range(len(data[sorting_column])): if only_for_col is None or data[only_for_col][i]: out_column.append(sorted_vals.index(data[sorting_column][i]) + 1) else: out_column.append(0) log.info("number_by column is %s, len(data[%s])=%s)", out_column, sorting_column, len(data[sorting_column])) return out_column parser = generateParser() if __name__ == "__main__": args = parser.parse_args() cgs, filenames = fuc.cgs_from_args(args, "any", enable_logging=True, return_filenames=True) data = defaultdict(list) if args.distances: dist_pairs = str(args.distances).split(str(':')) dist_pairs = [x.split(",") for x in dist_pairs] else: dist_pairs = [] if args.angles: angle_pairs = str(args.angles).split(str(":")) angle_pairs = [x.split(",") for x in angle_pairs] else: angle_pairs = [] for i, cg in enumerate(cgs):
"The file-name will be the RNA's name (if present), otherwise 'rna001' etc.") parser.add_argument("--filename", type=str, help="If this is present, --to-file will automatically be true." "A target filename (or path) without extention. " "If it is a filename, use the given filename instead of the RNA's name. " "If more than one input-RNA is present, appends automatically a increasing number." "If it is a directory, create files in this directory.") parser.add_argument("-f", "--force", action="store_true", help="Overwrite files, if they already exist. Note: In case of race conditions, " "files could be overwritten even if this flag is not provided.") if __name__ == "__main__": args = parser.parse_args() with fuc.hide_traceback(): cgs = fuc.cgs_from_args( args, rna_type=FILETYPES[args.target_type].rna_type) if args.filename: args.to_file = True if os.path.isdir(args.filename): directory = args.filename filename = None args.filename = None else: directory, filename = os.path.split(args.filename) else: filename = None directory = "" for i, cg in enumerate(cgs): if not args.to_file and i > 0: print("\n\n========================================\n\n")
def main(): parser = fuc.get_rna_input_parser("Find pseudoknots in RNA structures, " "classify them into shapes and analyze " "their 3D architecturre.", "+", parser_kwargs={"conflict_handler":"resolve"}) parser.add_argument("--pseudoknots", action="store_true", help=argparse.SUPPRESS) parser.add_argument("--outfile-mode", choices=["w","a"], default='w', help="Overwrite ('w') or append ('a') to output file") parser.add_argument("--minlength", type= int, help= "Minimum length of each stem. " "Stems with fewer base-pairs are treated as unpaired.", default = 2) args = parser.parse_args() args.pseudoknots=True rnas, filenames = fuc.cgs_from_args(args, rna_type="any",return_filenames = True, skip_errors=True) #variables for statistics unfold = 0 without_pk = 0 with_pk = 0 pseudoknot_dataset = [] pseudoknot_dataset_extended = defaultdict(list) for pos, rna in enumerate(rnas): try: without_pk, with_pk, unfold, pseudoknots = identification_pseudoknot\ (rna, without_pk, with_pk, unfold,args.minlength) #count types of pseudoknots total_pk_g1 = len(pseudoknots) pk_sortclasses = {} other = 0 other_pk = [] pk_classes, other = classify_pseudoknots(pseudoknots) print("pk_classes:") for key, pks in sorted(pk_classes.items()): print("{:<17s} {}\t{}".format(key, len(pks), ", ".join(map(str, pks)))) print("other:"+"\t"+", ".join(map(str,other))) filename = str(filenames[pos]).split("/")[-1] entry={} for key, pks in pk_classes.items(): entry[key] = len(pks) entry["other"] = len(other) entry["PK_other_structures"] = ",".join(map(str, other)) entry["filename"] = filename entry["rnaname"] = rna.name print(filename) pseudoknot_dataset.append(entry) pk_id = 0 for key, pks in pk_classes.items(): for pk in pks: pk_id+=1 extend_pk_description(pseudoknot_dataset_extended, filename, key, rna, pk, pk_id) for pk in other: pk_id+=1 extend_pk_description(pseudoknot_dataset_extended, filename, "other", rna, pk, pk_id) except GraphIntegrityError: log.exception("Ignoring RNA %s; GraphIntegrityError", rna.name) except GraphConstructionError: log.exception("Ignoring RNA %s; GraphConstructionError", rna.name) except Exception: log.error("Error processing %s", rna.name) raise df1 = pandas.DataFrame(pseudoknot_dataset) df1.to_csv("pseudoknot_identification_genus2.csv", mode=args.outfile_mode, header=args.outfile_mode!="a", sep="\t") df2 = pandas.DataFrame(pseudoknot_dataset_extended) df2.to_csv("pseudoknot_identification_extended_genus2.csv", mode=args.outfile_mode, header=args.outfile_mode!="a", sep="\t") print("Structures with Pseudoknots: {}".format(with_pk)) print("Structures without Pseudoknots: {}".format(without_pk)) print("Structures unfold: {}".format(unfold))
"--fragment-dir", type=str, help= "A directory, where pdb fragments will be stored. If None is given, they will not be stored." ) return parser parser = generateParser() if __name__ == "__main__": next_id = defaultdict(int) args = parser.parse_args() cgs, filenames = fuc.cgs_from_args(args, "+", "pdb", enable_logging=True, return_filenames=True, skip_errors=True) for cg, fn in zip(cgs, filenames): if sys.stderr.isatty(): print(cg.name, file=sys.stderr) cg.add_all_virtual_residues() for elem in cg.defines: if elem in cg.incomplete_elements: continue base_name = "{}:{}_".format(cg.name, elem[0]) idnr = next_id[base_name] next_id[base_name] += 1 name = base_name + str(idnr) if args.fragment_dir:
sorted_vals = list(sorted(set(df[sorting_column]))) out_column = [] for i in range(len(data[sorting_column])): if only_for_col is None or data[only_for_col][i]: out_column.append(sorted_vals.index(data[sorting_column][i]) + 1) else: out_column.append(0) log.info("number_by column is %s, len(data[%s])=%s)", out_column, sorting_column, len( data[sorting_column])) return out_column parser = generateParser() if __name__ == "__main__": args = parser.parse_args() cgs, filenames = fuc.cgs_from_args( args, "any", enable_logging=True, return_filenames=True) data = defaultdict(list) if args.distances: dist_pairs = str(args.distances).split(str(':')) dist_pairs = [x.split(",") for x in dist_pairs] else: dist_pairs = [] if args.angles: angle_pairs = str(args.angles).split(str(":")) angle_pairs = [x.split(",") for x in angle_pairs] else: angle_pairs = [] for i, cg in enumerate(cgs): file_num = i + 1
"A target filename (or path) without extention. " "If it is a filename, use the given filename instead of the RNA's name. " "If more than one input-RNA is present, appends automatically a increasing number." "If it is a directory, create files in this directory.") parser.add_argument( "-f", "--force", action="store_true", help= "Overwrite files, if they already exist. Note: In case of race conditions, " "files could be overwritten even if this flag is not provided.") if __name__ == "__main__": args = parser.parse_args() with fuc.hide_traceback(): cgs = fuc.cgs_from_args(args, rna_type=FILETYPES[args.target_type].rna_type) if args.filename: args.to_file = True if os.path.isdir(args.filename): directory = args.filename filename = None args.filename = None else: directory, filename = os.path.split(args.filename) else: filename = None directory = "" for i, cg in enumerate(cgs): if not args.to_file and i > 0: print("\n\n========================================\n\n")
def main(args): rnas = fuc.cgs_from_args(args, '+', '3d') pp = pymol_printer_from_args(args) if args.align: align_rnas(rnas) if args.labels: label_list = args.labels.split(",") labels = {} for label in label_list: if not label: continue try: elem, lab = label.split(':') except ValueError: raise ValueError( "Please specify --labels with as list of colon-seperated tuples. Found invalid entry {}." .format(repr(label))) labels[elem] = lab if not pp.print_text: labels = defaultdict(lambda: "", labels) pp.print_text = True else: labels = {} color_modifier = 1.0 log.info("Visualizing {} rnas".format(len(rnas))) for rna in rnas: pp.add_cg(rna, labels, color_modifier) color_modifier *= 0.7 with make_temp_directory() as tmpdir: # The file describing the cg-structure as cylinders if args.pymol_file: stru_filename = args.pymol_file else: stru_filename = os.path.join(tmpdir, "structure") with open(stru_filename, "w") as f: f.write(pp.pymol_string()) pdb_fns = [] selections = "" for i, rna in enumerate(rnas): if rna.chains: obj_name = "pdb{}_{}".format(i, rna.name.replace("-", "_")) fn = os.path.join(tmpdir, obj_name + ".cif") pdb_fns.append(fn) ftup.output_multiple_chains(rna.chains.values(), fn, "cif") for d in rna.defines: resids = list( rna.define_residue_num_iterator(d, seq_ids=True)) if resids: chains = {r.chain for r in resids} sel = [] for c in chains: sel.append( "( %{} and chain {} and resi {}) ".format( obj_name, c, "+".join( map(str, (r.resid[1] for r in resids))))) selections += "select {}, ".format( d + "_" + obj_name) + " or ".join(sel) + "\n" pymol_cmd = 'hide all\n' pymol_cmd += 'show cartoon, all\n' pymol_cmd += 'set cartoon_ring_mode\n' pymol_cmd += 'set cartoon_tube_radius, .3\n' if args.only_elements is not None: pymol_cmd += "hide all\n" for constraint in args.only_elements.split(','): color = pp.get_element_color(constraint) for r in cg.define_residue_num_iterator(constraint, seq_ids=True): pymol_cmd += "show sticks, resi %r\n" % (r[1]) pymol_cmd += "color %s, resi %r\n" % (color, r[1]) pymol_cmd += 'run %s\n' % (stru_filename) pymol_cmd += 'bg white\n' pymol_cmd += 'clip slab, 10000\n' #pymol_cmd += 'orient\n' pymol_cmd += selections if args.output is not None: pymol_cmd += 'ray\n' pymol_cmd += 'png %s\n' % (args.output) #pymol_cmd += 'quit\n' pml_filename = os.path.join(tmpdir, "command.pml") with open(pml_filename, "w") as f1: f1.write(pymol_cmd) if args.batch: p = sp.Popen(['pymol', '-cq'] + pdb_fns + [pml_filename], stdout=sp.PIPE, stderr=sp.PIPE) else: p = sp.Popen(['pymol'] + pdb_fns + [pml_filename], stdout=sp.PIPE, stderr=sp.PIPE) log.info("Now opening pymol") out, err = p.communicate() log.info("Out=\n%s", out) log.info("Errt=\n%s", err)
def main(): parser = fuc.get_rna_input_parser( "Find pseudoknots in RNA structures, " "classify them into shapes and analyze " "their 3D architecturre.", "+", parser_kwargs={"conflict_handler": "resolve"}) parser.add_argument("--pseudoknots", action="store_true", help=argparse.SUPPRESS) parser.add_argument("--outfile-mode", choices=["w", "a"], default='w', help="Overwrite ('w') or append ('a') to output file") parser.add_argument("--minlength", type=int, help="Minimum length of each stem. " "Stems with fewer base-pairs are treated as unpaired.", default=2) args = parser.parse_args() args.pseudoknots = True rnas, filenames = fuc.cgs_from_args(args, rna_type="any", return_filenames=True, skip_errors=True) #variables for statistics unfold = 0 without_pk = 0 with_pk = 0 pseudoknot_dataset = [] pseudoknot_dataset_extended = defaultdict(list) for pos, rna in enumerate(rnas): try: without_pk, with_pk, unfold, pseudoknots = identification_pseudoknot\ (rna, without_pk, with_pk, unfold,args.minlength) #count types of pseudoknots total_pk_g1 = len(pseudoknots) pk_sortclasses = {} other = 0 other_pk = [] pk_classes, other = classify_pseudoknots(pseudoknots) print("pk_classes:") for key, pks in sorted(pk_classes.items()): print("{:<17s} {}\t{}".format(key, len(pks), ", ".join(map(str, pks)))) print("other:" + "\t" + ", ".join(map(str, other))) filename = str(filenames[pos]).split("/")[-1] entry = {} for key, pks in pk_classes.items(): entry[key] = len(pks) entry["other"] = len(other) entry["PK_other_structures"] = ",".join(map(str, other)) entry["filename"] = filename entry["rnaname"] = rna.name print(filename) pseudoknot_dataset.append(entry) pk_id = 0 for key, pks in pk_classes.items(): for pk in pks: pk_id += 1 extend_pk_description(pseudoknot_dataset_extended, filename, key, rna, pk, pk_id) for pk in other: pk_id += 1 extend_pk_description(pseudoknot_dataset_extended, filename, "other", rna, pk, pk_id) except GraphIntegrityError: log.exception("Ignoring RNA %s; GraphIntegrityError", rna.name) except GraphConstructionError: log.exception("Ignoring RNA %s; GraphConstructionError", rna.name) except Exception: log.error("Error processing %s", rna.name) raise df1 = pandas.DataFrame(pseudoknot_dataset) df1.to_csv("pseudoknot_identification_genus2.csv", mode=args.outfile_mode, header=args.outfile_mode != "a", sep="\t") df2 = pandas.DataFrame(pseudoknot_dataset_extended) df2.to_csv("pseudoknot_identification_extended_genus2.csv", mode=args.outfile_mode, header=args.outfile_mode != "a", sep="\t") print("Structures with Pseudoknots: {}".format(with_pk)) print("Structures without Pseudoknots: {}".format(without_pk)) print("Structures unfold: {}".format(unfold))
def main(args): rnas = fuc.cgs_from_args(args, '+', '3d') pp = pymol_printer_from_args(args) if args.align: align_rnas(rnas) if args.labels: label_list = args.labels.split(",") labels = {} for label in label_list: if not label: continue try: elem, lab = label.split(':') except ValueError: raise ValueError( "Please specify --labels with as list of colon-seperated tuples. Found invalid entry {}.".format(repr(label))) labels[elem] = lab if not pp.print_text: labels = defaultdict(lambda: "", labels) pp.print_text = True else: labels = {} color_modifier = 1.0 log.info("Visualizing {} rnas".format(len(rnas))) for rna in rnas: pp.add_cg(rna, labels, color_modifier) color_modifier *= 0.7 with make_temp_directory() as tmpdir: # The file describing the cg-structure as cylinders if args.pymol_file: stru_filename = args.pymol_file else: stru_filename = os.path.join(tmpdir, "structure") with open(stru_filename, "w") as f: f.write(pp.pymol_string()) pdb_fns = [] selections = "" for i, rna in enumerate(rnas): if rna.chains: obj_name = "pdb{}_{}".format(i, rna.name.replace("-", "_")) fn = os.path.join(tmpdir, obj_name + ".cif") pdb_fns.append(fn) ftup.output_multiple_chains(rna.chains.values(), fn, "cif") for d in rna.defines: resids = list( rna.define_residue_num_iterator(d, seq_ids=True)) if resids: chains = {r.chain for r in resids} sel = [] for c in chains: sel.append("( %{} and chain {} and resi {}) ".format( obj_name, c, "+".join(map(str, (r.resid[1] for r in resids))))) selections += "select {}, ".format( d + "_" + obj_name) + " or ".join(sel) + "\n" pymol_cmd = 'hide all\n' pymol_cmd += 'show cartoon, all\n' pymol_cmd += 'set cartoon_ring_mode\n' pymol_cmd += 'set cartoon_tube_radius, .3\n' if args.only_elements is not None: pymol_cmd += "hide all\n" for constraint in args.only_elements.split(','): color = pp.get_element_color(constraint) for r in cg.define_residue_num_iterator(constraint, seq_ids=True): pymol_cmd += "show sticks, resi %r\n" % (r[1]) pymol_cmd += "color %s, resi %r\n" % (color, r[1]) pymol_cmd += 'run %s\n' % (stru_filename) pymol_cmd += 'bg white\n' pymol_cmd += 'clip slab, 10000\n' #pymol_cmd += 'orient\n' pymol_cmd += selections if args.output is not None: pymol_cmd += 'ray\n' pymol_cmd += 'png %s\n' % (args.output) #pymol_cmd += 'quit\n' pml_filename = os.path.join(tmpdir, "command.pml") with open(pml_filename, "w") as f1: f1.write(pymol_cmd) if args.batch: p = sp.Popen(['pymol', '-cq'] + pdb_fns + [pml_filename], stdout=sp.PIPE, stderr=sp.PIPE) else: p = sp.Popen(['pymol'] + pdb_fns + [pml_filename], stdout=sp.PIPE, stderr=sp.PIPE) log.info("Now opening pymol") out, err = p.communicate() log.info("Out=\n%s", out) log.info("Errt=\n%s", err)
import logging def get_parser(): parser = fuc.get_rna_input_parser("Create stats", nargs='+', rna_type="only_cg") return parser parser = get_parser() if __name__ == "__main__": next_id = defaultdict(int) args = parser.parse_args() cgs = fuc.cgs_from_args(args, "only_cg") for cg in cgs: if sys.stderr.isatty(): print(cg.name, file=sys.stderr) for elem in cg.defines.keys(): if elem in cg.incomplete_elements: print("Skipping element", elem, file=sys.stderr) continue base_name = "{}:{}_".format(cg.name, elem[0]) for stat in cg.get_stats(elem): idnr = next_id[base_name] next_id[base_name] += 1 name = base_name + str(idnr) stat.pdb_name = name if elem.startswith("m"): try: