Beispiel #1
0
def import_3Ds(db_host = 'localhost', db_port = 27017, rna3dhub = False, canonical_only = True, annotate = False, limit = 5000):
    client = MongoClient(db_host, db_port)
    db_name = ""

    if rna3dhub:
        db_name = "RNA3DHub"
    else:
        rna3dHub = None
        db_name = "PDB"

    db = client[db_name]
    rnaview = Rnaview()

    if not rna3dhub:
        pdb = PDB()
        query ="""<orgPdbQuery>
    <version>head</version>
    <queryType>org.pdb.query.simple.ChainTypeQuery</queryType>
    <description>Chain Type: there is a Protein and a RNA chain but not any DNA or Hybrid</description>
    <containsProtein>N</containsProtein>
    <containsDna>N</containsDna>
    <containsRna>Y</containsRna>
    <containsHybrid>N</containsHybrid>
  </orgPdbQuery>"""
        pdb_ids = pdb.query(query)
        print "%i 3Ds to process"%len(pdb_ids)

        for pdb_id in pdb_ids:
            if db['tertiaryStructures'].find_one({'source':"db:pdb:%s"%pdb_id}):
                continue
            print "Recover %s"%pdb_id
            for ts in parsers.parse_pdb(pdb.get_entry(pdb_id)):
                try:
                    ss = None
                    if annotate:
                        ss, ts = rnaview.annotate(ts, canonical_only = canonical_only)
                    save(db, ss, ts, pdb_id, limit)

                except Exception, e:
                    print e
                    print "No annotation for %s"%pdb_id
                    save(db, None, ts, pdb_id, limit)
def test():
    print "Recovering entry 1EHZ from Protein Databank...\n"
    pdb = PDB()
    tertiary_structures = parse_pdb(pdb.get_entry('1EHZ'))

    print "## 3D annotation ##\n"

    print "List of base-pairs computed with RNAVIEW:\n"

    for ts in tertiary_structures:
        secondary_structure, tertiary_structure = Rnaview().annotate(ts)
        print secondary_structure_to_base_pairs(secondary_structure, keep_tertiaries = True)

    print "\n## 2D prediction ##\n"

    for ts in tertiary_structures:
        print "RNA sequence from 1EHZ:\n"
        print ts.rna.sequence
        print "\nList of base-pairs computed with RNAfold (RNA Vienna Package):\n"
        print Rnafold().fold(molecule=ts.rna)
                    save(db, ss, ts, pdb_id, limit)

                except Exception, e:
                    print e
                    print "No annotation for %s"%pdb_id
    else:
        pdb = PDB()
        rna3dHub = RNA3DHub()
        clusters = rna3dHub.get_clusters()
        print "%i 3Ds to process"%len(clusters)

        for cluster in clusters['pdb-ids']:
            if db['tertiaryStructures'].find_one({'source':"db:pdb:%s"%cluster[0]}):
                continue
            print "Recover %s"%cluster[0] #we use the first pdb_id in the list of ids making a cluster
            for ts in parsers.parse_pdb(pdb.get_entry(cluster[0])):
                try:
                    ss = None
                    if annotate:
                        ss, ts = rnaview.annotate(ts, canonical_only = canonical_only)
                    save(db, ss, ts, cluster[0], limit)

                except Exception, e:
                    print e
                    print "No annotation for %s"%cluster[0]

def save(db, secondary_structure, tertiary_structure, pdbId, limit):
    if db['junctions'].count() >= limit:
        print "Limit of %i junctions reached"%limit
        sys.exit()
Beispiel #4
0
                except Exception, e:
                    print e
                    print "No annotation for %s"%pdb_id
                    save(db, None, ts, pdb_id, limit)
    else:
        pdb = PDB()
        rna3dHub = RNA3DHub()
        clusters = rna3dHub.get_clusters()
        print "%i 3Ds to process"%len(clusters)

        for cluster in clusters['pdb-ids']:
            pdb_id = cluster[0].split('|')[0]
            if db['tertiaryStructures'].find_one({'source':"db:pdb:%s"%pdb_id}):
                continue
            print "Recover %s"%pdb_id #we use the first pdb_id in the list of ids making a cluster
            for ts in parsers.parse_pdb(pdb.get_entry(pdb_id)):
                try:
                    ss = None
                    if annotate:
                        ss, ts = rnaview.annotate(ts, canonical_only = canonical_only)
                    save(db, ss, ts, pdb_id, limit)

                except Exception, e:
                    print e
                    print "No annotation for %s"%pdb_id
                    save(db, None, ts, pdb_id, limit)

def save(db, secondary_structure, tertiary_structure, pdbId, limit):
    if db['junctions'].count() >= limit:
        print "Limit of %i junctions reached"%limit
        sys.exit()
Beispiel #5
0
    def post(self):
        data = self.get_argument('data', default=None)
        tool = self.get_argument('tool', default=None)
        version = self.get_argument('version', default=1)
        pdbid = self.get_argument('pdbid', default=None)
        output = None
        result = None

        log = {
            'path': self.request.uri,
            'tool': tool,
            'ip': self.request.remote_ip,
            'method': self.request.method,
            'date': datetime.datetime.now(),
            'status': 'running'
        }

        logs_db['webservices'].insert(log)

        if data and data.startswith('>'):  #2D prediction
            rnas = parse_fasta(data)
            result = []
            if len(rnas) == 1:  #single molecule prediction (MFE,...)
                rna = rnas[0]
                secondary_structures = []
                if tool == 'rnafold':
                    secondary_structures.append(
                        base_pairs_to_secondary_structure(
                            rna,
                            Rnafold().fold(rna)))
                elif tool == 'contrafold':
                    secondary_structures.append(
                        base_pairs_to_secondary_structure(
                            rna,
                            Contrafold().fold(rna)))
                elif tool == 'rnasubopt':
                    random_sample = int(
                        self.get_argument('random_sample', default=20))
                    for _result in Rnasubopt().fold(
                            rna, random_sample=random_sample):
                        secondary_structures.append(
                            base_pairs_to_secondary_structure(rna, _result))
                for ss in secondary_structures:
                    _result = {
                        '_id': ss._id,
                        'name': ss.name,
                        'source': ss.source,
                        'rna': {
                            'name': ss.rna.name,
                            'sequence': ss.rna.sequence,
                            'source': ss.rna.source,
                            '_id': ss.rna._id
                        }
                    }

                    helices_descr = []
                    for helix in ss.helices:
                        helix_desc = {
                            'name': helix['name'],
                            'location': {
                                'ends': helix['location']
                            } if version == 1 else helix['location']
                        }
                        if helix.has_key('interactions'):
                            interactions_descr = []
                            for interaction in helix['interactions']:
                                interactions_descr.append({
                                    'orientation':
                                    interaction['orientation'],
                                    'edge1':
                                    interaction['edge1'],
                                    'edge2':
                                    interaction['edge2'],
                                    'location': {
                                        'ends': interaction['location']
                                    } if version == 1 else
                                    interaction['location']
                                })
                            helix_desc['interactions'] = interactions_descr

                        helices_descr.append(helix_desc)

                    _result['helices'] = helices_descr

                    single_strands_descr = []
                    for single_strand in ss.single_strands:
                        single_strands_descr.append({
                            'name':
                            single_strand['name'],
                            'location': {
                                'ends': single_strand['location']
                            } if version == 1 else single_strand['location']
                        })

                    _result['singleStrands'] = single_strands_descr

                    tertiary_interactions_descr = []
                    for tertiary_interaction in ss.tertiary_interactions:
                        tertiary_interactions_descr.append({
                            'orientation':
                            tertiary_interaction['orientation'],
                            'edge1':
                            tertiary_interaction['edge1'],
                            'edge2':
                            tertiary_interaction['edge2'],
                            'location': {
                                'ends': tertiary_interaction['location']
                            } if version == 1 else
                            tertiary_interaction['location']
                        })

                    _result[
                        'tertiaryInteractions'] = tertiary_interactions_descr
                    result.append(_result)
                if tool == 'rnafold' or tool == 'contrafold':
                    logs_db['webservices'].update({'_id': log['_id']}, {
                        '$set': {
                            'status': 'done',
                            'date': datetime.datetime.now()
                        }
                    })
                    self.write(json_encode(result[0]))
                else:
                    logs_db['webservices'].update({'_id': log['_id']}, {
                        '$set': {
                            'status': 'done',
                            'date': datetime.datetime.now()
                        }
                    })
                    self.write(json_encode(result))
            elif len(rnas) >= 2:  #structural alignment
                if tool == 'mlocarna':
                    aligned_molecules, consensus2D = Mlocarna().align(rnas)
                    logs_db['webservices'].update({'_id': log['_id']}, {
                        '$set': {
                            'status': 'done',
                            'date': datetime.datetime.now()
                        }
                    })
                    self.write(to_clustalw(consensus2D, aligned_molecules))
        elif tool == 'rnalifold' and data and data.startswith(
                'CLUSTAL'
        ):  #computation of consensus structure from sequence alignment
            logs_db['webservices'].update(
                {'_id': log['_id']},
                {'$set': {
                    'status': 'done',
                    'date': datetime.datetime.now()
                }})
            self.write(RnaAlifold().align(data))
        elif tool == 'rnaview':  #3D annotation
            from pyrna.db import PDB
            rnaview = Rnaview()

            if output == 'rnaml':
                if pdbid:
                    self.write(
                        rnaview.annotate(pdb_content=PDB().get_entry(pdbid),
                                         raw_output=True))
                elif data:
                    self.write(
                        rnaview.annotate(pdb_content=data, raw_output=True))

            else:
                if pdbid:
                    tertiary_structures = parse_pdb(PDB().get_entry(pdbid))
                elif data:
                    tertiary_structures = parse_pdb(data)

                result = []

                for ts in tertiary_structures:
                    (ss, ts) = rnaview.annotate(ts, canonical_only=False)

                    ss.find_junctions()

                    _2D_descr = {
                        '_id': ss._id,
                        'name': ss.name,
                        'source': ss.source,
                        'rna': {
                            'name': ss.rna.name,
                            'sequence': ss.rna.sequence,
                            'source': ss.rna.source,
                            '_id': ss.rna._id
                        }
                    }

                    helices_descr = []
                    for helix in ss.helices:
                        helix_desc = {
                            'name': helix['name'],
                            'location': {
                                'ends': helix['location']
                            } if version == 1 else helix['location']
                        }
                        if helix.has_key('interactions'):
                            interactions_descr = []
                            for interaction in helix['interactions']:
                                interactions_descr.append({
                                    'orientation':
                                    interaction['orientation'],
                                    'edge1':
                                    interaction['edge1'],
                                    'edge2':
                                    interaction['edge2'],
                                    'location': {
                                        'ends': interaction['location']
                                    } if version == 1 else
                                    interaction['location']
                                })
                            helix_desc['interactions'] = interactions_descr

                        helices_descr.append(helix_desc)

                    _2D_descr['helices'] = helices_descr

                    single_strands_descr = []
                    for single_strand in ss.single_strands:
                        single_strands_descr.append({
                            'name':
                            single_strand['name'],
                            'location': {
                                'ends': single_strand['location']
                            } if version == 1 else single_strand['location']
                        })

                    _2D_descr['singleStrands'] = single_strands_descr

                    tertiary_interactions_descr = []
                    for tertiary_interaction in ss.tertiary_interactions:
                        tertiary_interactions_descr.append({
                            'orientation':
                            tertiary_interaction['orientation'],
                            'edge1':
                            tertiary_interaction['edge1'],
                            'edge2':
                            tertiary_interaction['edge2'],
                            'location': {
                                'ends': tertiary_interaction['location']
                            } if version == 1 else
                            tertiary_interaction['location']
                        })

                    _2D_descr[
                        'tertiaryInteractions'] = tertiary_interactions_descr

                    junctions_descr = []

                    for junction in ss.junctions:
                        junctions_descr.append({
                            'description':
                            junction['description'],
                            'location':
                            junction['location']
                        })

                    _2D_descr['junctions'] = junctions_descr

                    _3D_descr = {
                        '_id': ts._id,
                        'name': ts.name,
                        'source': ts.source,
                        'rna': {
                            'name': ts.rna.name,
                            'sequence': ts.rna.sequence,
                            'source': ts.rna.source,
                            '_id': ts.rna._id
                        }
                    }

                    residues_descr = {}
                    keys = []
                    for k in ts.residues:
                        keys.append(k)

                    keys.sort()  #the absolute position are sorted

                    for key in keys:
                        atoms = ts.residues[key]['atoms']

                        atoms_descr = []

                        for atom in atoms:
                            atoms_descr.append({
                                'name': atom['name'],
                                'coords': atom['coords']
                            })
                        residues_descr[str(key)] = {'atoms': atoms_descr}

                    _3D_descr['residues'] = residues_descr

                    result.append({"2D": _2D_descr, "3D": _3D_descr})
                logs_db['webservices'].update({'_id': log['_id']}, {
                    '$set': {
                        'status': 'done',
                        'date': datetime.datetime.now()
                    }
                })
                self.write(json_encode(result))
from pyrna.computations import Cmalign, Rnaview
from bson.objectid import ObjectId
import os

pdb = PDB()
cmalign = Cmalign()
rnaview = Rnaview()
rfam = Rfam(cache_dir = "/home/fjossinet/tmp/Rfam")

families_with_structures = rfam.get_families_with_structures()

for index, row in families_with_structures.iterrows():

    rfam_id = row['rfam_id']

    tertiary_structures = parse_pdb(pdb.get_entry(row['pdb_id']))

    reference_rna = None
    ts = None

    for tertiary_structure in tertiary_structures:
        if tertiary_structure.rna.name == row['chain_name']:
            ts = tertiary_structure
            reference_rna = ts.rna
            break

    if ts:
        secondary_structure, tertiary_structure = rnaview.annotate(tertiary_structure = ts)
        rnas, orgs, consensus_2d = cmalign.align([reference_rna], rfam_id = rfam_id, rfam = rfam)
        os.mkdir("/home/fjossinet/tmp/%s"%rfam_id)
        os.mkdir("/home/fjossinet/tmp/%s/Molecules"%rfam_id)
Beispiel #7
0
from pyrna.computations import Cmalign, Rnaview
from bson.objectid import ObjectId
import os

pdb = PDB()
cmalign = Cmalign()
rnaview = Rnaview()
rfam = Rfam(cache_dir="/home/fjossinet/tmp/Rfam")

families_with_structures = rfam.get_families_with_structures()

for index, row in families_with_structures.iterrows():

    rfam_id = row['rfam_id']

    tertiary_structures = parse_pdb(pdb.get_entry(row['pdb_id']))

    reference_rna = None
    ts = None

    for tertiary_structure in tertiary_structures:
        if tertiary_structure.rna.name == row['chain_name']:
            ts = tertiary_structure
            reference_rna = ts.rna
            break

    if ts:
        secondary_structure, tertiary_structure = rnaview.annotate(
            tertiary_structure=ts)
        rnas, orgs, consensus_2d = cmalign.align([reference_rna],
                                                 rfam_id=rfam_id,
    def post(self):
        data = self.get_argument('data', default = None)
        tool = self.get_argument('tool', default = None)
        version = self.get_argument('version', default = 1)
        pdbid =  self.get_argument('pdbid', default = None)
        output = None
        result = None

        log = {
            'path': self.request.uri,
            'tool': tool,
            'ip': self.request.remote_ip,
            'method': self.request.method,
            'date': datetime.datetime.now(),
            'status': 'running'
        }

        logs_db['webservices'].insert(log)

        if data and data.startswith('>'): #2D prediction
            rnas = parse_fasta(data)
            result = []
            if len(rnas) == 1: #single molecule prediction (MFE,...)
                rna = rnas[0]
                secondary_structures = []
                if tool == 'rnafold':
                    secondary_structures.append(base_pairs_to_secondary_structure(rna, Rnafold().fold(rna)))
                elif tool == 'contrafold':
                    secondary_structures.append(base_pairs_to_secondary_structure(rna, Contrafold().fold(rna)))
                elif tool == 'rnasubopt':
                    random_sample = int(self.get_argument('random_sample', default = 20))
                    for _result in Rnasubopt().fold(rna, random_sample = random_sample):
                        secondary_structures.append(base_pairs_to_secondary_structure(rna, _result))
                for ss in secondary_structures:
                    _result = {
                        '_id': ss._id,
                        'name': ss.name,
                        'source': ss.source,
                        'rna': {
                            'name': ss.rna.name,
                            'sequence': ss.rna.sequence,
                            'source': ss.rna.source,
                            '_id': ss.rna._id
                        }
                    }

                    helices_descr = []
                    for helix in ss.helices:
                        helix_desc = {
                            'name': helix['name'],
                            'location': {'ends': helix['location']} if version == 1 else helix['location']
                        }
                        if helix.has_key('interactions'):
                            interactions_descr = []
                            for interaction in helix['interactions']:
                                interactions_descr.append({
                                    'orientation': interaction['orientation'],
                                    'edge1': interaction['edge1'],
                                    'edge2': interaction['edge2'],
                                    'location': {'ends': interaction['location']} if version == 1 else interaction['location']
                                })
                            helix_desc['interactions'] = interactions_descr

                        helices_descr.append(helix_desc)

                    _result['helices'] = helices_descr

                    single_strands_descr = []
                    for single_strand in ss.single_strands:
                        single_strands_descr.append({
                            'name': single_strand['name'],
                            'location': {'ends': single_strand['location']} if version == 1 else single_strand['location']
                        })

                    _result['singleStrands'] = single_strands_descr

                    tertiary_interactions_descr = []
                    for tertiary_interaction in ss.tertiary_interactions:
                        tertiary_interactions_descr.append({
                            'orientation': tertiary_interaction['orientation'],
                            'edge1': tertiary_interaction['edge1'],
                            'edge2': tertiary_interaction['edge2'],
                            'location': {'ends': tertiary_interaction['location']} if version == 1 else tertiary_interaction['location']
                        })

                    _result['tertiaryInteractions'] = tertiary_interactions_descr
                    result.append(_result)
                if tool == 'rnafold' or tool == 'contrafold':
                    logs_db['webservices'].update({ '_id': log['_id'] }, {'$set': { 'status' : 'done', 'date':datetime.datetime.now()}})
                    self.write(json_encode(result[0]))
                else:
                    logs_db['webservices'].update({ '_id': log['_id'] }, {'$set': { 'status' : 'done', 'date':datetime.datetime.now()}})
                    self.write(json_encode(result))
            elif len(rnas) >= 2: #structural alignment
                if tool == 'mlocarna':
                    aligned_molecules, consensus2D = Mlocarna().align(rnas)
                    logs_db['webservices'].update({ '_id': log['_id'] }, {'$set': { 'status' : 'done', 'date':datetime.datetime.now()}})
                    self.write(to_clustalw(consensus2D, aligned_molecules))
        elif tool == 'rnalifold' and data and data.startswith('CLUSTAL'): #computation of consensus structure from sequence alignment
            logs_db['webservices'].update({ '_id': log['_id'] }, {'$set': { 'status' : 'done', 'date':datetime.datetime.now()}})
            self.write(RnaAlifold().align(data))
        elif tool == 'rnaview': #3D annotation
            from pyrna.db import PDB
            rnaview = Rnaview()

            if output == 'rnaml':
                if pdbid:
                    self.write(rnaview.annotate(pdb_content = PDB().get_entry(pdbid), raw_output = True))
                elif data:
                    self.write(rnaview.annotate(pdb_content = data, raw_output = True))

            else:
                if pdbid:
                    tertiary_structures = parse_pdb(PDB().get_entry(pdbid))
                elif data:
                    tertiary_structures = parse_pdb(data)

                result = []

                for ts in tertiary_structures:
                    (ss, ts) = rnaview.annotate(ts, canonical_only = False)

                    ss.find_junctions()

                    _2D_descr = {
                        '_id': ss._id,
                        'name': ss.name,
                        'source': ss.source,
                        'rna': {
                            'name': ss.rna.name,
                            'sequence': ss.rna.sequence,
                            'source': ss.rna.source,
                            '_id': ss.rna._id
                        }
                    }

                    helices_descr = []
                    for helix in ss.helices:
                        helix_desc = {
                            'name': helix['name'],
                            'location': {'ends': helix['location']} if version == 1 else helix['location']
                        }
                        if helix.has_key('interactions'):
                            interactions_descr = []
                            for interaction in helix['interactions']:
                                interactions_descr.append({
                                    'orientation': interaction['orientation'],
                                    'edge1': interaction['edge1'],
                                    'edge2': interaction['edge2'],
                                    'location': {'ends': interaction['location']} if version == 1 else interaction['location']
                                })
                            helix_desc['interactions'] = interactions_descr

                        helices_descr.append(helix_desc)

                    _2D_descr['helices'] = helices_descr

                    single_strands_descr = []
                    for single_strand in ss.single_strands:
                        single_strands_descr.append({
                            'name': single_strand['name'],
                            'location': {'ends': single_strand['location']} if version == 1 else single_strand['location']
                        })

                    _2D_descr['singleStrands'] = single_strands_descr

                    tertiary_interactions_descr = []
                    for tertiary_interaction in ss.tertiary_interactions:
                        tertiary_interactions_descr.append({
                            'orientation': tertiary_interaction['orientation'],
                            'edge1': tertiary_interaction['edge1'],
                            'edge2': tertiary_interaction['edge2'],
                            'location': {'ends': tertiary_interaction['location']} if version == 1 else tertiary_interaction['location']
                        })

                    _2D_descr['tertiaryInteractions'] = tertiary_interactions_descr

                    junctions_descr = []

                    for junction in ss.junctions:
                        junctions_descr.append({
                            'description': junction['description'],
                            'location': junction['location']
                        })


                    _2D_descr['junctions'] = junctions_descr

                    _3D_descr = {
                        '_id': ts._id,
                        'name': ts.name,
                        'source': ts.source,
                        'rna': {
                            'name': ts.rna.name,
                            'sequence': ts.rna.sequence,
                            'source': ts.rna.source,
                            '_id': ts.rna._id
                        }
                    }

                    residues_descr = {}
                    keys=[]
                    for k in ts.residues:
                        keys.append(k)

                    keys.sort() #the absolute position are sorted

                    for key in keys:
                        atoms = ts.residues[key]['atoms']

                        atoms_descr = []

                        for atom in atoms:
                            atoms_descr.append({
                                'name': atom['name'],
                                'coords': atom['coords']
                            })
                        residues_descr[str(key)] = {
                            'atoms': atoms_descr
                        }

                    _3D_descr['residues'] = residues_descr


                    result.append({"2D": _2D_descr, "3D": _3D_descr})
                logs_db['webservices'].update({ '_id': log['_id'] }, {'$set': { 'status' : 'done', 'date':datetime.datetime.now()}})
                self.write(json_encode(result))