def import_3Ds(db_host = 'localhost', db_port = 27017, rna3dhub = False, canonical_only = True, annotate = False, limit = 5000): client = MongoClient(db_host, db_port) db_name = "" if rna3dhub: db_name = "RNA3DHub" else: rna3dHub = None db_name = "PDB" db = client[db_name] rnaview = Rnaview() if not rna3dhub: pdb = PDB() query ="""<orgPdbQuery> <version>head</version> <queryType>org.pdb.query.simple.ChainTypeQuery</queryType> <description>Chain Type: there is a Protein and a RNA chain but not any DNA or Hybrid</description> <containsProtein>N</containsProtein> <containsDna>N</containsDna> <containsRna>Y</containsRna> <containsHybrid>N</containsHybrid> </orgPdbQuery>""" pdb_ids = pdb.query(query) print "%i 3Ds to process"%len(pdb_ids) for pdb_id in pdb_ids: if db['tertiaryStructures'].find_one({'source':"db:pdb:%s"%pdb_id}): continue print "Recover %s"%pdb_id for ts in parsers.parse_pdb(pdb.get_entry(pdb_id)): try: ss = None if annotate: ss, ts = rnaview.annotate(ts, canonical_only = canonical_only) save(db, ss, ts, pdb_id, limit) except Exception, e: print e print "No annotation for %s"%pdb_id save(db, None, ts, pdb_id, limit)
def test(): print "Recovering entry 1EHZ from Protein Databank...\n" pdb = PDB() tertiary_structures = parse_pdb(pdb.get_entry('1EHZ')) print "## 3D annotation ##\n" print "List of base-pairs computed with RNAVIEW:\n" for ts in tertiary_structures: secondary_structure, tertiary_structure = Rnaview().annotate(ts) print secondary_structure_to_base_pairs(secondary_structure, keep_tertiaries = True) print "\n## 2D prediction ##\n" for ts in tertiary_structures: print "RNA sequence from 1EHZ:\n" print ts.rna.sequence print "\nList of base-pairs computed with RNAfold (RNA Vienna Package):\n" print Rnafold().fold(molecule=ts.rna)
save(db, ss, ts, pdb_id, limit) except Exception, e: print e print "No annotation for %s"%pdb_id else: pdb = PDB() rna3dHub = RNA3DHub() clusters = rna3dHub.get_clusters() print "%i 3Ds to process"%len(clusters) for cluster in clusters['pdb-ids']: if db['tertiaryStructures'].find_one({'source':"db:pdb:%s"%cluster[0]}): continue print "Recover %s"%cluster[0] #we use the first pdb_id in the list of ids making a cluster for ts in parsers.parse_pdb(pdb.get_entry(cluster[0])): try: ss = None if annotate: ss, ts = rnaview.annotate(ts, canonical_only = canonical_only) save(db, ss, ts, cluster[0], limit) except Exception, e: print e print "No annotation for %s"%cluster[0] def save(db, secondary_structure, tertiary_structure, pdbId, limit): if db['junctions'].count() >= limit: print "Limit of %i junctions reached"%limit sys.exit()
except Exception, e: print e print "No annotation for %s"%pdb_id save(db, None, ts, pdb_id, limit) else: pdb = PDB() rna3dHub = RNA3DHub() clusters = rna3dHub.get_clusters() print "%i 3Ds to process"%len(clusters) for cluster in clusters['pdb-ids']: pdb_id = cluster[0].split('|')[0] if db['tertiaryStructures'].find_one({'source':"db:pdb:%s"%pdb_id}): continue print "Recover %s"%pdb_id #we use the first pdb_id in the list of ids making a cluster for ts in parsers.parse_pdb(pdb.get_entry(pdb_id)): try: ss = None if annotate: ss, ts = rnaview.annotate(ts, canonical_only = canonical_only) save(db, ss, ts, pdb_id, limit) except Exception, e: print e print "No annotation for %s"%pdb_id save(db, None, ts, pdb_id, limit) def save(db, secondary_structure, tertiary_structure, pdbId, limit): if db['junctions'].count() >= limit: print "Limit of %i junctions reached"%limit sys.exit()
def post(self): data = self.get_argument('data', default=None) tool = self.get_argument('tool', default=None) version = self.get_argument('version', default=1) pdbid = self.get_argument('pdbid', default=None) output = None result = None log = { 'path': self.request.uri, 'tool': tool, 'ip': self.request.remote_ip, 'method': self.request.method, 'date': datetime.datetime.now(), 'status': 'running' } logs_db['webservices'].insert(log) if data and data.startswith('>'): #2D prediction rnas = parse_fasta(data) result = [] if len(rnas) == 1: #single molecule prediction (MFE,...) rna = rnas[0] secondary_structures = [] if tool == 'rnafold': secondary_structures.append( base_pairs_to_secondary_structure( rna, Rnafold().fold(rna))) elif tool == 'contrafold': secondary_structures.append( base_pairs_to_secondary_structure( rna, Contrafold().fold(rna))) elif tool == 'rnasubopt': random_sample = int( self.get_argument('random_sample', default=20)) for _result in Rnasubopt().fold( rna, random_sample=random_sample): secondary_structures.append( base_pairs_to_secondary_structure(rna, _result)) for ss in secondary_structures: _result = { '_id': ss._id, 'name': ss.name, 'source': ss.source, 'rna': { 'name': ss.rna.name, 'sequence': ss.rna.sequence, 'source': ss.rna.source, '_id': ss.rna._id } } helices_descr = [] for helix in ss.helices: helix_desc = { 'name': helix['name'], 'location': { 'ends': helix['location'] } if version == 1 else helix['location'] } if helix.has_key('interactions'): interactions_descr = [] for interaction in helix['interactions']: interactions_descr.append({ 'orientation': interaction['orientation'], 'edge1': interaction['edge1'], 'edge2': interaction['edge2'], 'location': { 'ends': interaction['location'] } if version == 1 else interaction['location'] }) helix_desc['interactions'] = interactions_descr helices_descr.append(helix_desc) _result['helices'] = helices_descr single_strands_descr = [] for single_strand in ss.single_strands: single_strands_descr.append({ 'name': single_strand['name'], 'location': { 'ends': single_strand['location'] } if version == 1 else single_strand['location'] }) _result['singleStrands'] = single_strands_descr tertiary_interactions_descr = [] for tertiary_interaction in ss.tertiary_interactions: tertiary_interactions_descr.append({ 'orientation': tertiary_interaction['orientation'], 'edge1': tertiary_interaction['edge1'], 'edge2': tertiary_interaction['edge2'], 'location': { 'ends': tertiary_interaction['location'] } if version == 1 else tertiary_interaction['location'] }) _result[ 'tertiaryInteractions'] = tertiary_interactions_descr result.append(_result) if tool == 'rnafold' or tool == 'contrafold': logs_db['webservices'].update({'_id': log['_id']}, { '$set': { 'status': 'done', 'date': datetime.datetime.now() } }) self.write(json_encode(result[0])) else: logs_db['webservices'].update({'_id': log['_id']}, { '$set': { 'status': 'done', 'date': datetime.datetime.now() } }) self.write(json_encode(result)) elif len(rnas) >= 2: #structural alignment if tool == 'mlocarna': aligned_molecules, consensus2D = Mlocarna().align(rnas) logs_db['webservices'].update({'_id': log['_id']}, { '$set': { 'status': 'done', 'date': datetime.datetime.now() } }) self.write(to_clustalw(consensus2D, aligned_molecules)) elif tool == 'rnalifold' and data and data.startswith( 'CLUSTAL' ): #computation of consensus structure from sequence alignment logs_db['webservices'].update( {'_id': log['_id']}, {'$set': { 'status': 'done', 'date': datetime.datetime.now() }}) self.write(RnaAlifold().align(data)) elif tool == 'rnaview': #3D annotation from pyrna.db import PDB rnaview = Rnaview() if output == 'rnaml': if pdbid: self.write( rnaview.annotate(pdb_content=PDB().get_entry(pdbid), raw_output=True)) elif data: self.write( rnaview.annotate(pdb_content=data, raw_output=True)) else: if pdbid: tertiary_structures = parse_pdb(PDB().get_entry(pdbid)) elif data: tertiary_structures = parse_pdb(data) result = [] for ts in tertiary_structures: (ss, ts) = rnaview.annotate(ts, canonical_only=False) ss.find_junctions() _2D_descr = { '_id': ss._id, 'name': ss.name, 'source': ss.source, 'rna': { 'name': ss.rna.name, 'sequence': ss.rna.sequence, 'source': ss.rna.source, '_id': ss.rna._id } } helices_descr = [] for helix in ss.helices: helix_desc = { 'name': helix['name'], 'location': { 'ends': helix['location'] } if version == 1 else helix['location'] } if helix.has_key('interactions'): interactions_descr = [] for interaction in helix['interactions']: interactions_descr.append({ 'orientation': interaction['orientation'], 'edge1': interaction['edge1'], 'edge2': interaction['edge2'], 'location': { 'ends': interaction['location'] } if version == 1 else interaction['location'] }) helix_desc['interactions'] = interactions_descr helices_descr.append(helix_desc) _2D_descr['helices'] = helices_descr single_strands_descr = [] for single_strand in ss.single_strands: single_strands_descr.append({ 'name': single_strand['name'], 'location': { 'ends': single_strand['location'] } if version == 1 else single_strand['location'] }) _2D_descr['singleStrands'] = single_strands_descr tertiary_interactions_descr = [] for tertiary_interaction in ss.tertiary_interactions: tertiary_interactions_descr.append({ 'orientation': tertiary_interaction['orientation'], 'edge1': tertiary_interaction['edge1'], 'edge2': tertiary_interaction['edge2'], 'location': { 'ends': tertiary_interaction['location'] } if version == 1 else tertiary_interaction['location'] }) _2D_descr[ 'tertiaryInteractions'] = tertiary_interactions_descr junctions_descr = [] for junction in ss.junctions: junctions_descr.append({ 'description': junction['description'], 'location': junction['location'] }) _2D_descr['junctions'] = junctions_descr _3D_descr = { '_id': ts._id, 'name': ts.name, 'source': ts.source, 'rna': { 'name': ts.rna.name, 'sequence': ts.rna.sequence, 'source': ts.rna.source, '_id': ts.rna._id } } residues_descr = {} keys = [] for k in ts.residues: keys.append(k) keys.sort() #the absolute position are sorted for key in keys: atoms = ts.residues[key]['atoms'] atoms_descr = [] for atom in atoms: atoms_descr.append({ 'name': atom['name'], 'coords': atom['coords'] }) residues_descr[str(key)] = {'atoms': atoms_descr} _3D_descr['residues'] = residues_descr result.append({"2D": _2D_descr, "3D": _3D_descr}) logs_db['webservices'].update({'_id': log['_id']}, { '$set': { 'status': 'done', 'date': datetime.datetime.now() } }) self.write(json_encode(result))
from pyrna.computations import Cmalign, Rnaview from bson.objectid import ObjectId import os pdb = PDB() cmalign = Cmalign() rnaview = Rnaview() rfam = Rfam(cache_dir = "/home/fjossinet/tmp/Rfam") families_with_structures = rfam.get_families_with_structures() for index, row in families_with_structures.iterrows(): rfam_id = row['rfam_id'] tertiary_structures = parse_pdb(pdb.get_entry(row['pdb_id'])) reference_rna = None ts = None for tertiary_structure in tertiary_structures: if tertiary_structure.rna.name == row['chain_name']: ts = tertiary_structure reference_rna = ts.rna break if ts: secondary_structure, tertiary_structure = rnaview.annotate(tertiary_structure = ts) rnas, orgs, consensus_2d = cmalign.align([reference_rna], rfam_id = rfam_id, rfam = rfam) os.mkdir("/home/fjossinet/tmp/%s"%rfam_id) os.mkdir("/home/fjossinet/tmp/%s/Molecules"%rfam_id)
from pyrna.computations import Cmalign, Rnaview from bson.objectid import ObjectId import os pdb = PDB() cmalign = Cmalign() rnaview = Rnaview() rfam = Rfam(cache_dir="/home/fjossinet/tmp/Rfam") families_with_structures = rfam.get_families_with_structures() for index, row in families_with_structures.iterrows(): rfam_id = row['rfam_id'] tertiary_structures = parse_pdb(pdb.get_entry(row['pdb_id'])) reference_rna = None ts = None for tertiary_structure in tertiary_structures: if tertiary_structure.rna.name == row['chain_name']: ts = tertiary_structure reference_rna = ts.rna break if ts: secondary_structure, tertiary_structure = rnaview.annotate( tertiary_structure=ts) rnas, orgs, consensus_2d = cmalign.align([reference_rna], rfam_id=rfam_id,
def post(self): data = self.get_argument('data', default = None) tool = self.get_argument('tool', default = None) version = self.get_argument('version', default = 1) pdbid = self.get_argument('pdbid', default = None) output = None result = None log = { 'path': self.request.uri, 'tool': tool, 'ip': self.request.remote_ip, 'method': self.request.method, 'date': datetime.datetime.now(), 'status': 'running' } logs_db['webservices'].insert(log) if data and data.startswith('>'): #2D prediction rnas = parse_fasta(data) result = [] if len(rnas) == 1: #single molecule prediction (MFE,...) rna = rnas[0] secondary_structures = [] if tool == 'rnafold': secondary_structures.append(base_pairs_to_secondary_structure(rna, Rnafold().fold(rna))) elif tool == 'contrafold': secondary_structures.append(base_pairs_to_secondary_structure(rna, Contrafold().fold(rna))) elif tool == 'rnasubopt': random_sample = int(self.get_argument('random_sample', default = 20)) for _result in Rnasubopt().fold(rna, random_sample = random_sample): secondary_structures.append(base_pairs_to_secondary_structure(rna, _result)) for ss in secondary_structures: _result = { '_id': ss._id, 'name': ss.name, 'source': ss.source, 'rna': { 'name': ss.rna.name, 'sequence': ss.rna.sequence, 'source': ss.rna.source, '_id': ss.rna._id } } helices_descr = [] for helix in ss.helices: helix_desc = { 'name': helix['name'], 'location': {'ends': helix['location']} if version == 1 else helix['location'] } if helix.has_key('interactions'): interactions_descr = [] for interaction in helix['interactions']: interactions_descr.append({ 'orientation': interaction['orientation'], 'edge1': interaction['edge1'], 'edge2': interaction['edge2'], 'location': {'ends': interaction['location']} if version == 1 else interaction['location'] }) helix_desc['interactions'] = interactions_descr helices_descr.append(helix_desc) _result['helices'] = helices_descr single_strands_descr = [] for single_strand in ss.single_strands: single_strands_descr.append({ 'name': single_strand['name'], 'location': {'ends': single_strand['location']} if version == 1 else single_strand['location'] }) _result['singleStrands'] = single_strands_descr tertiary_interactions_descr = [] for tertiary_interaction in ss.tertiary_interactions: tertiary_interactions_descr.append({ 'orientation': tertiary_interaction['orientation'], 'edge1': tertiary_interaction['edge1'], 'edge2': tertiary_interaction['edge2'], 'location': {'ends': tertiary_interaction['location']} if version == 1 else tertiary_interaction['location'] }) _result['tertiaryInteractions'] = tertiary_interactions_descr result.append(_result) if tool == 'rnafold' or tool == 'contrafold': logs_db['webservices'].update({ '_id': log['_id'] }, {'$set': { 'status' : 'done', 'date':datetime.datetime.now()}}) self.write(json_encode(result[0])) else: logs_db['webservices'].update({ '_id': log['_id'] }, {'$set': { 'status' : 'done', 'date':datetime.datetime.now()}}) self.write(json_encode(result)) elif len(rnas) >= 2: #structural alignment if tool == 'mlocarna': aligned_molecules, consensus2D = Mlocarna().align(rnas) logs_db['webservices'].update({ '_id': log['_id'] }, {'$set': { 'status' : 'done', 'date':datetime.datetime.now()}}) self.write(to_clustalw(consensus2D, aligned_molecules)) elif tool == 'rnalifold' and data and data.startswith('CLUSTAL'): #computation of consensus structure from sequence alignment logs_db['webservices'].update({ '_id': log['_id'] }, {'$set': { 'status' : 'done', 'date':datetime.datetime.now()}}) self.write(RnaAlifold().align(data)) elif tool == 'rnaview': #3D annotation from pyrna.db import PDB rnaview = Rnaview() if output == 'rnaml': if pdbid: self.write(rnaview.annotate(pdb_content = PDB().get_entry(pdbid), raw_output = True)) elif data: self.write(rnaview.annotate(pdb_content = data, raw_output = True)) else: if pdbid: tertiary_structures = parse_pdb(PDB().get_entry(pdbid)) elif data: tertiary_structures = parse_pdb(data) result = [] for ts in tertiary_structures: (ss, ts) = rnaview.annotate(ts, canonical_only = False) ss.find_junctions() _2D_descr = { '_id': ss._id, 'name': ss.name, 'source': ss.source, 'rna': { 'name': ss.rna.name, 'sequence': ss.rna.sequence, 'source': ss.rna.source, '_id': ss.rna._id } } helices_descr = [] for helix in ss.helices: helix_desc = { 'name': helix['name'], 'location': {'ends': helix['location']} if version == 1 else helix['location'] } if helix.has_key('interactions'): interactions_descr = [] for interaction in helix['interactions']: interactions_descr.append({ 'orientation': interaction['orientation'], 'edge1': interaction['edge1'], 'edge2': interaction['edge2'], 'location': {'ends': interaction['location']} if version == 1 else interaction['location'] }) helix_desc['interactions'] = interactions_descr helices_descr.append(helix_desc) _2D_descr['helices'] = helices_descr single_strands_descr = [] for single_strand in ss.single_strands: single_strands_descr.append({ 'name': single_strand['name'], 'location': {'ends': single_strand['location']} if version == 1 else single_strand['location'] }) _2D_descr['singleStrands'] = single_strands_descr tertiary_interactions_descr = [] for tertiary_interaction in ss.tertiary_interactions: tertiary_interactions_descr.append({ 'orientation': tertiary_interaction['orientation'], 'edge1': tertiary_interaction['edge1'], 'edge2': tertiary_interaction['edge2'], 'location': {'ends': tertiary_interaction['location']} if version == 1 else tertiary_interaction['location'] }) _2D_descr['tertiaryInteractions'] = tertiary_interactions_descr junctions_descr = [] for junction in ss.junctions: junctions_descr.append({ 'description': junction['description'], 'location': junction['location'] }) _2D_descr['junctions'] = junctions_descr _3D_descr = { '_id': ts._id, 'name': ts.name, 'source': ts.source, 'rna': { 'name': ts.rna.name, 'sequence': ts.rna.sequence, 'source': ts.rna.source, '_id': ts.rna._id } } residues_descr = {} keys=[] for k in ts.residues: keys.append(k) keys.sort() #the absolute position are sorted for key in keys: atoms = ts.residues[key]['atoms'] atoms_descr = [] for atom in atoms: atoms_descr.append({ 'name': atom['name'], 'coords': atom['coords'] }) residues_descr[str(key)] = { 'atoms': atoms_descr } _3D_descr['residues'] = residues_descr result.append({"2D": _2D_descr, "3D": _3D_descr}) logs_db['webservices'].update({ '_id': log['_id'] }, {'$set': { 'status' : 'done', 'date':datetime.datetime.now()}}) self.write(json_encode(result))