def test_dssr_backslash_in_filename(self): """ DSSR puts the input filename in the JSON, which makes the JSON invalid, if a backslash is in it. We patch the DSSR JSON before parsing. """ with make_temp_directory() as d: # On Windows, bla is a directory, and the backslash is # part of the path, # on decent operating systems, # the backslash is part of the filename. filename = os.path.join(d, "bla\\something.pdb") dir, rest = os.path.split(filename) # On Windows, make the directory bla, on Linux do nothing try: os.makedirs(dir) except OSError: # Directory exists pass shutil.copy('test/forgi/threedee/data/1y26.pdb', filename) try: # Make sure we do not raise any error. cg, = ftmc.CoarseGrainRNA.from_pdb(filename, annotation_tool="DSSR") except ftmc.AnnotationToolNotInstalled: self.skipTest("This Test requires DSSR") self.check_graph_integrity(cg) self.assertGreater(len(cg.defines), 2)
def test_dssr_backslash_in_filename(self): """ DSSR puts the input filename in the JSON, which makes the JSON invalid, if a backslash is in it. We patch the DSSR JSON before parsing. """ with make_temp_directory() as d: # On Windows, bla is a directory, and the backslash is # part of the path, # on decent operating systems, # the backslash is part of the filename. filename=os.path.join(d, "bla\\something.pdb") dir, rest = os.path.split(filename) # On Windows, make the directory bla, on Linux do nothing try: os.makedirs(dir) except OSError: # Directory exists pass shutil.copy('test/forgi/threedee/data/1y26.pdb', filename) try: # Make sure we do not raise any error. cg, = ftmc.CoarseGrainRNA.from_pdb(filename, annotation_tool="DSSR") except ftmc.AnnotationToolNotInstalled: self.skipTest("This Test requires DSSR") self.check_graph_integrity(cg) self.assertGreater(len(cg.defines), 2)
def main(args): rnas = fuc.cgs_from_args(args, '+', '3d') pp = pymol_printer_from_args(args) if args.align: align_rnas(rnas) if args.labels: label_list = args.labels.split(",") labels = {} for label in label_list: if not label: continue try: elem, lab = label.split(':') except ValueError: raise ValueError( "Please specify --labels with as list of colon-seperated tuples. Found invalid entry {}.".format(repr(label))) labels[elem] = lab if not pp.print_text: labels = defaultdict(lambda: "", labels) pp.print_text = True else: labels = {} color_modifier = 1.0 log.info("Visualizing {} rnas".format(len(rnas))) for rna in rnas: pp.add_cg(rna, labels, color_modifier) color_modifier *= 0.7 with make_temp_directory() as tmpdir: # The file describing the cg-structure as cylinders if args.pymol_file: stru_filename = args.pymol_file else: stru_filename = os.path.join(tmpdir, "structure") with open(stru_filename, "w") as f: f.write(pp.pymol_string()) pdb_fns = [] selections = "" for i, rna in enumerate(rnas): if rna.chains: obj_name = "pdb{}_{}".format(i, rna.name.replace("-", "_")) fn = os.path.join(tmpdir, obj_name + ".cif") pdb_fns.append(fn) ftup.output_multiple_chains(rna.chains.values(), fn, "cif") for d in rna.defines: resids = list( rna.define_residue_num_iterator(d, seq_ids=True)) if resids: chains = {r.chain for r in resids} sel = [] for c in chains: sel.append("( %{} and chain {} and resi {}) ".format( obj_name, c, "+".join(map(str, (r.resid[1] for r in resids))))) selections += "select {}, ".format( d + "_" + obj_name) + " or ".join(sel) + "\n" pymol_cmd = 'hide all\n' pymol_cmd += 'show cartoon, all\n' pymol_cmd += 'set cartoon_ring_mode\n' pymol_cmd += 'set cartoon_tube_radius, .3\n' if args.only_elements is not None: pymol_cmd += "hide all\n" for constraint in args.only_elements.split(','): color = pp.get_element_color(constraint) for r in cg.define_residue_num_iterator(constraint, seq_ids=True): pymol_cmd += "show sticks, resi %r\n" % (r[1]) pymol_cmd += "color %s, resi %r\n" % (color, r[1]) pymol_cmd += 'run %s\n' % (stru_filename) pymol_cmd += 'bg white\n' pymol_cmd += 'clip slab, 10000\n' #pymol_cmd += 'orient\n' pymol_cmd += selections if args.output is not None: pymol_cmd += 'ray\n' pymol_cmd += 'png %s\n' % (args.output) #pymol_cmd += 'quit\n' pml_filename = os.path.join(tmpdir, "command.pml") with open(pml_filename, "w") as f1: f1.write(pymol_cmd) if args.batch: p = sp.Popen(['pymol', '-cq'] + pdb_fns + [pml_filename], stdout=sp.PIPE, stderr=sp.PIPE) else: p = sp.Popen(['pymol'] + pdb_fns + [pml_filename], stdout=sp.PIPE, stderr=sp.PIPE) log.info("Now opening pymol") out, err = p.communicate() log.info("Out=\n%s", out) log.info("Errt=\n%s", err)
def mend_breakpoints(chains, gap): """ :param gap: A list of res_ids, which can be moved to mend the gap. """ #raise NotImplementedError("Error") try: import moderna except ImportError: warnings.warn( "Cannot mend gaps in sequence, because ModeRNA is not installed!") return chains mod_models = {} with fus.make_temp_directory() as tmpdir: log.info("Writing chains %s", chains.values()) #ftup.output_multiple_chains(chains.values(), op.join(tmpdir, "tmp.pdb")) for g in gap: if g[0].chain != g[1].chain: log.warning( "Not mending gap between multiple chains: %s and %s", g[0], g[1]) continue if g[0].chain not in mod_models: try: mod_models[g[0].chain] = moderna.load_model( chains[g[0].chain], data_type="chain" ) #moderna.load_model(op.join(tmpdir, "tmp.pdb"), g[0].chain) except Exception as e: with log_to_exception(log, e): log.error("g is %s, g[0] is %s, g[0].chain is %s", g, g[0], g[0].chain) log.error("chains is %s", chains) raise moderna.fix_backbone(mod_models[g[0].chain], resid_to_moderna(g[0]), resid_to_moderna(g[1])) #moderna.write_model(mod_models[g[0].chain], op.join(tmpdir, "tmp.pdb")) #for chain_id, model in mod_models.items(): # moderna.write_model(model, op.join(tmpdir, "mended_{}.pdb".format(chain_id))) #Load back to Biopython mended_chains = {} for chain_id in chains.keys(): if chain_id in mod_models: mended_chains[chain_id] = mod_models[ chain_id] #Mod models are chain subclasses anyway log.info("Mended:", mended_chains) mended_chains[chain_id].id = chain_id else: mended_chains[chain_id] = chains[chain_id] log.info("mended_chains: %s", mended_chains) # Moderna may replace modified residues with "UNK" for unknown or otherrwise change the code. # We have to replace them back. for chain_id in chains: for res in mended_chains[chain_id]: changed = False for o_res in chains[chain_id]: if o_res.id[1:] == res.id[1:]: log.debug("Changing Moderna residue %s to %s", res, o_res) assert not changed #Only one residue per number+icode res.id = o_res.id res.resname = o_res.resname log.debug("Moderna residue now %s", res) changed = True # Convert back from ModeRNA to Biopython out_chains = {} for k, v in mended_chains.items(): s = v.get_structure()[0] log.error("%s, %s %s", k, s, s.child_dict) assert len(s.child_list) == 1 out_chains[k] = s.child_list[0] out_chains[k].id = k return out_chains
def main(): usage = """ ./visualize_cg.py cg_file Display the coarse-grain representation of a structure in pymol. """ num_args = 1 parser = OptionParser(usage=usage) # parser.add_option('-u', '--useless', dest='uselesss', # default=False, action='store_true', help='Another useless option') parser.add_option('-g', '--highlight', dest='highlight', default=None, help="Highlight some elements", type='str') parser.add_option('-o', '--output', dest='output', default=None, help="Create a picture of the scene and exit", type='str') parser.add_option('-r', '--longrange', dest='longrange', default=False, action='store_true', help="Display long-range interactions") parser.add_option('-l', '--loops', dest='loops', default=True, action='store_false', help="Don't display the coarse-grain hairpin loops") parser.add_option('-c', '--cones', dest='cones', default=False, action='store_true', help="Display cones that portrude from the stems") parser.add_option('-x', '--text', dest='text', default=False, action='store_true', help="Add labels to the figure.") parser.add_option('-a', '--align', dest='align', default=False, action='store_true', help='Align all of the structures with the first') parser.add_option( '-e', '--encompassing-stems', dest='encompassing_stems', default=False, action='store_true', help='Show the big stems that encompass the colinear ones.') parser.add_option('-v', '--virtual-atoms', dest='virtual_atoms', default=False, action='store_true', help='Display the virtual atoms') parser.add_option('-d', '--distance', dest='distance', default=None, help="Draw the lines between specified virtual residues") parser.add_option('-t', '--residue-distance', dest='residue_distance', default=None, help="Draw a line between residue distances") parser.add_option('-b', '--basis', dest='basis', default=False, action='store_true', help='Display the coordinate basis of each element') parser.add_option('', '--stem-color', dest='stem_color', default='green', help='The default color in coarse-grain drawings') parser.add_option('', '--multiloop-color', dest='multiloop_color', default='red', help='The default color in coarse-grain drawings') parser.add_option('', '--batch', dest='batch', default=False, action='store_true', help='Start pymol in batch mode') parser.add_option( '', '--sidechain-atoms', dest='sidechain_atoms', default=False, action='store_true', help= 'Include the sidechain atoms. Automatically enables --virtual-atoms') parser.add_option( '', '--rainbow', dest='rainbow', default=False, action='store_true', help= 'Color each of the nucleotide positions (i.e. average atoms) according to the colors of \ the rainbow and their position') parser.add_option('', '--only-elements', dest='only_elements', default=None, help='Display only these elements ' 'element names should be ' 'separated by commas') parser.add_option('', '--color-gradual', dest='color_gradual', default=None, help='Color the specified elements' 'gradually from one to the other, example (i1,i4,m1)', type='str') (options, args) = parser.parse_args() if len(args) < num_args: parser.print_help() sys.exit(1) pp = cvp.PymolPrinter() pp.stem_color = options.stem_color pp.multiloop_color = options.multiloop_color pp.add_loops = options.loops pp.draw_cones = options.cones # sys.exit(1) pp.add_longrange = options.longrange pp.print_text = options.text pp.encompassing_stems = options.encompassing_stems pp.virtual_atoms = options.virtual_atoms pp.sidechain_atoms = options.sidechain_atoms pp.basis = options.basis pp.rainbow = options.rainbow if options.only_elements is not None: pp.only_elements = options.only_elements.split(',') cgs = [] for a in args: cgs += [cmg.CoarseGrainRNA(a)] if options.align: align_cgs(cgs) if options.color_gradual is not None: pp.element_specific_colors = dict() import matplotlib.pyplot as plt cmap = plt.get_cmap('coolwarm') for d in cgs[0].defines: pp.element_specific_colors[d] = 'black' to_color_nodes = options.color_gradual.split(',') for i, node in enumerate(to_color_nodes): print(node, cmap(i / float(len(to_color_nodes)))) pp.element_specific_colors[node] = cmap(i / float(len(to_color_nodes))) for i, cg in enumerate(cgs): if i > 0: pp.color_modifier = .3 #pp.override_color = 'middle gray' pp.coordinates_to_pymol(cg) # highlight things in purple if options.highlight is not None: for s in options.highlight.split(','): fud.pv('s') pp.add_twists = False pp.add_stem_like(cg, s, color='purple', width=3.) # display the distances between nucleotides if options.distance is not None: virtual_atoms = ftug.virtual_atoms(cg, sidechain=False) for dist_pair in options.distance.split(':'): fud.pv('dist_pair') fr, to = dist_pair.split(',') fr = int(fr) to = int(to) pp.add_dashed(virtual_atoms[fr]["C1'"], virtual_atoms[to]["C1'"], width=1.2) if options.residue_distance is not None: dist_pair = options.residue_distance fr, to = dist_pair.split(',') fr = int(fr) to = int(to) node1 = cg.get_node_from_residue_num(to) node2 = cg.get_node_from_residue_num(fr) pos1, len1 = cg.get_position_in_element(to) pos2, len2 = cg.get_position_in_element(fr) #fud.pv('node1, node2, pos1, pos2') vec1 = cg.coords[node1][1] - cg.coords[node1][0] vec2 = cg.coords[node2][1] - cg.coords[node2][0] #mid1 = (cg.coords[node1][0] + cg.coords[node1][1]) / 2 #mid2 = (cg.coords[node2][0] + cg.coords[node2][1]) / 2 mid1 = cg.coords[node1][0] + pos1 * (vec1 / len1) mid2 = cg.coords[node2][0] + pos2 * (vec2 / len2) pp.add_sphere(mid1, 'green', width=2) pp.add_sphere(mid2, 'red', width=2) with make_temp_directory() as tmpdir: # The file describing the cg-structure as cylinders stru_filename = os.path.join(tmpdir, "structure") with open(stru_filename, "w") as f: f.write(pp.pymol_string()) # The file for running pymol pymol_cmd = 'hide all\n' pymol_cmd += 'run %s\n' % (stru_filename) pymol_cmd += 'show cartoon, all\n' pymol_cmd += 'bg white\n' pymol_cmd += 'clip slab, 10000\n' pymol_cmd += 'orient\n' if options.output is not None: pymol_cmd += 'ray\n' pymol_cmd += 'png %s\n' % (options.output) pymol_cmd += 'quit\n' pml_filename = os.path.join(tmpdir, "command.pml") with open(pml_filename, "w") as f1: f1.write(pymol_cmd) if options.batch: p = sp.Popen(['pymol', '-cq', pml_filename], stdout=sp.PIPE, stderr=sp.PIPE) else: p = sp.Popen(['pymol', pml_filename], stdout=sp.PIPE, stderr=sp.PIPE) out, err = p.communicate()
def pdb_to_json(text, name, parser=None): ''' Create a graph-layout displaying a pdb file which presumably contains some RNA The text is the contents of the pdb file. :param text: The text of the pdb file. :param name: The name of the pdb file. :param parser: The PDB parser to use (Bio.PDB.PDBParser or Bio.PDB.MMCIFParser) ''' with fus.make_temp_directory() as output_dir: fname = op.join(output_dir, '{}.pdb'.format(name)) with open(fname, 'w') as f: # dump the pdb text to a temporary file f.write(text) f.flush() struct = parser.get_structure('temp', fname) chains = struct.get_chains() molecules = [] proteins = set() rnas = set() cgs = dict() for chain in chains: # create a graph json for each structure in the pdb file if ftup.is_protein(chain): print >> sys.stderr, "protein", chain proteins.add(chain.id) # process protein molecules += [{ "type": "protein", "header": "{}_{}".format(name, chain.id), "seq": "", "ss": "", "size": len(chain.get_list()), "uids": [uuid.uuid4().hex] }] pass elif ftup.is_rna(chain): print >> sys.stderr, "rna", chain rnas.add(chain.id) # process RNA molecules (hopefully) cg = ftmc.from_pdb(fname, chain_id=chain.id, remove_pseudoknots=True, parser=parser) positions = fasta_to_positions(cg.to_fasta_string()) cg = ftmc.from_pdb(fname, chain_id=chain.id, remove_pseudoknots=False, parser=parser) cgs[chain.id] = cg molecules += [{ "type": "rna", "header": "{}_{}".format(name, chain.id), "seq": cg.seq, "ss": cg.to_dotbracket_string(), "size": cg.seq_length, "uids": [uuid.uuid4().hex for i in range(cg.seq_length)], "positions": positions }] else: # hetatm type chains which are present in MMCIF files pass # create a lookup table linking the id and residue number to the uid of # that nucleotide and residue number node_ids = dict() for m in molecules: for i, uid in enumerate(m['uids']): node_ids["{}_{}".format(m['header'], i + 1)] = uid links = [] for (a1, a2) in ftup.interchain_contacts(struct): if (a1.parent.id[0] != ' ' or a2.parent.id[0] != ' '): #hetatm's will be ignored for now continue chain1 = a1.parent.parent.id chain2 = a2.parent.parent.id # the source and target values below need to be reduced by the length of the # nodes array because when the jsons are added to the graph, the link # source and target are incremented so as to correspond to the new indeces # of the nodes # so a link to a node at position 10, if there are 50 nodes, will have to have # a source value of -40 if (chain1 in proteins and chain2 in rnas): # get the index of this nucleotide in the secondary structure sid = cgs[chain2].seq_ids.index(a2.parent.id) links += [{ "source": node_ids["{}_{}_{}".format(name, chain2, sid + 1)], "target": node_ids["{}_{}_{}".format(name, chain1, 1)], "link_type": "protein_chain", "value": 3 }] elif (chain2 in proteins and chain1 in rnas): # get the index of this nucleotide in the secondary structure sid = cgs[chain1].seq_ids.index(a1.parent.id) links += [{ "source": node_ids["{}_{}_{}".format(name, chain1, sid + 1)], "target": node_ids["{}_{}_{}".format(name, chain2, 1)], "link_type": "protein_chain", "value": 3 }] elif (chain2 in rnas and chain1 in rnas): # get the index of this nucleotide in the secondary structure sid1 = cgs[chain1].seq_ids.index(a1.parent.id) sid2 = cgs[chain2].seq_ids.index(a2.parent.id) links += [{ "source": node_ids["{}_{}_{}".format(name, chain1, sid1 + 1)], "target": node_ids["{}_{}_{}".format(name, chain2, sid2 + 1)], "link_type": "chain_chain", "value": 3 }] return {"molecules": molecules, "extra_links": links}
def main(args): rnas = fuc.cgs_from_args(args, '+', '3d') pp = pymol_printer_from_args(args) if args.align: align_rnas(rnas) if args.labels: label_list = args.labels.split(",") labels = {} for label in label_list: if not label: continue try: elem, lab = label.split(':') except ValueError: raise ValueError( "Please specify --labels with as list of colon-seperated tuples. Found invalid entry {}." .format(repr(label))) labels[elem] = lab if not pp.print_text: labels = defaultdict(lambda: "", labels) pp.print_text = True else: labels = {} color_modifier = 1.0 log.info("Visualizing {} rnas".format(len(rnas))) for rna in rnas: pp.add_cg(rna, labels, color_modifier) color_modifier *= 0.7 with make_temp_directory() as tmpdir: # The file describing the cg-structure as cylinders if args.pymol_file: stru_filename = args.pymol_file else: stru_filename = os.path.join(tmpdir, "structure") with open(stru_filename, "w") as f: f.write(pp.pymol_string()) pdb_fns = [] selections = "" for i, rna in enumerate(rnas): if rna.chains: obj_name = "pdb{}_{}".format(i, rna.name.replace("-", "_")) fn = os.path.join(tmpdir, obj_name + ".cif") pdb_fns.append(fn) ftup.output_multiple_chains(rna.chains.values(), fn, "cif") for d in rna.defines: resids = list( rna.define_residue_num_iterator(d, seq_ids=True)) if resids: chains = {r.chain for r in resids} sel = [] for c in chains: sel.append( "( %{} and chain {} and resi {}) ".format( obj_name, c, "+".join( map(str, (r.resid[1] for r in resids))))) selections += "select {}, ".format( d + "_" + obj_name) + " or ".join(sel) + "\n" pymol_cmd = 'hide all\n' pymol_cmd += 'show cartoon, all\n' pymol_cmd += 'set cartoon_ring_mode\n' pymol_cmd += 'set cartoon_tube_radius, .3\n' if args.only_elements is not None: pymol_cmd += "hide all\n" for constraint in args.only_elements.split(','): color = pp.get_element_color(constraint) for r in cg.define_residue_num_iterator(constraint, seq_ids=True): pymol_cmd += "show sticks, resi %r\n" % (r[1]) pymol_cmd += "color %s, resi %r\n" % (color, r[1]) pymol_cmd += 'run %s\n' % (stru_filename) pymol_cmd += 'bg white\n' pymol_cmd += 'clip slab, 10000\n' #pymol_cmd += 'orient\n' pymol_cmd += selections if args.output is not None: pymol_cmd += 'ray\n' pymol_cmd += 'png %s\n' % (args.output) #pymol_cmd += 'quit\n' pml_filename = os.path.join(tmpdir, "command.pml") with open(pml_filename, "w") as f1: f1.write(pymol_cmd) if args.batch: p = sp.Popen(['pymol', '-cq'] + pdb_fns + [pml_filename], stdout=sp.PIPE, stderr=sp.PIPE) else: p = sp.Popen(['pymol'] + pdb_fns + [pml_filename], stdout=sp.PIPE, stderr=sp.PIPE) log.info("Now opening pymol") out, err = p.communicate() log.info("Out=\n%s", out) log.info("Errt=\n%s", err)
def pdb_to_json(text, name): ''' Create a graph-layout displaying a pdb file which presumably contains some RNA The text is the contents of the pdb file. ''' with fus.make_temp_directory() as output_dir: fname = op.join(output_dir, '{}.pdb'.format(name)) with open(fname, 'w') as f: # dump the pdb text to a temporary file f.write(text) f.flush struct = bpdb.PDBParser().get_structure('temp', fname) chains = struct.get_chains() jsons = [] proteins = set() rnas = set() cgs = dict() for chain in chains: # create a graph json for each structure in the pdb file if ftup.is_protein(chain): proteins.add(chain.id) # process protein jsons += [{ "nodes": [{ "group": 2, "struct_name": "{}_{}".format(name, chain.id), "id": 1, "size": len(chain.get_list()), "name": chain.id, "node_type": "protein" }], "links": [] }] pass else: rnas.add(chain.id) # process RNA molecules (hopefully) cg = ftmc.from_pdb(fname, chain_id=chain.id) cgs[chain.id] = cg jsons += [bg_to_json(cg)] # create a lookup table to find out the index of each node in the # what will eventually become the large list of nodes counter = 0 node_ids = dict() for j in jsons: for n in j['nodes']: node_ids["{}_{}".format(n['struct_name'], n['id'])] = counter counter += 1 links = [] for (a1, a2) in ftup.interchain_contacts(struct): if (a1.parent.id[0] != ' ' or a2.parent.id[0] != ' '): #hetatm's will be ignored for now continue chain1 = a1.parent.parent.id chain2 = a2.parent.parent.id # the source and target values below need to be reduced by the length of the # nodes array because when the jsons are added to the graph, the link # source and target are incremented so as to correspond to the new indeces # of the nodes # so a link to a node at position 10, if there are 50 nodes, will have to have # a source value of -40 if (chain1 in proteins and chain2 in rnas): # get the index of this nucleotide in the secondary structure sid = cgs[chain2].seq_ids.index(a2.parent.id) links += [{ "source": node_ids["{}_{}_{}".format(name, chain2, sid + 1)] - counter, "target": node_ids["{}_{}_{}".format(name, chain1, 1)] - counter, "link_type": "protein_chain", "value": 3 }] elif (chain2 in proteins and chain1 in rnas): # get the index of this nucleotide in the secondary structure sid = cgs[chain1].seq_ids.index(a1.parent.id) links += [{ "source": node_ids["{}_{}_{}".format(name, chain1, sid + 1)] - counter, "target": node_ids["{}_{}_{}".format(name, chain2, 1)] - counter, "link_type": "protein_chain", "value": 3 }] elif (chain2 in rnas and chain1 in rnas): # get the index of this nucleotide in the secondary structure sid1 = cgs[chain1].seq_ids.index(a1.parent.id) sid2 = cgs[chain2].seq_ids.index(a2.parent.id) links += [{ "source": node_ids["{}_{}_{}".format(name, chain1, sid1 + 1)] - counter, "target": node_ids["{}_{}_{}".format(name, chain2, sid2 + 1)] - counter, "link_type": "chain_chain", "value": 3 }] #jsons += [{'nodes': [], "links": links}] jsons += [{"nodes": [], "links": links}] return {"jsons": jsons, "extra_links": links}