Esempio n. 1
0
def import_3Ds(db_host = 'localhost', db_port = 27017, rna3dhub = False, canonical_only = True, annotate = False, limit = 5000):
    client = MongoClient(db_host, db_port)
    db_name = ""

    if rna3dhub:
        db_name = "RNA3DHub"
    else:
        rna3dHub = None
        db_name = "PDB"

    db = client[db_name]
    rnaview = Rnaview()

    if not rna3dhub:
        pdb = PDB()
        query ="""<orgPdbQuery>
    <version>head</version>
    <queryType>org.pdb.query.simple.ChainTypeQuery</queryType>
    <description>Chain Type: there is a Protein and a RNA chain but not any DNA or Hybrid</description>
    <containsProtein>N</containsProtein>
    <containsDna>N</containsDna>
    <containsRna>Y</containsRna>
    <containsHybrid>N</containsHybrid>
  </orgPdbQuery>"""
        pdb_ids = pdb.query(query)
        print "%i 3Ds to process"%len(pdb_ids)

        for pdb_id in pdb_ids:
            if db['tertiaryStructures'].find_one({'source':"db:pdb:%s"%pdb_id}):
                continue
            print "Recover %s"%pdb_id
            for ts in parsers.parse_pdb(pdb.get_entry(pdb_id)):
                try:
                    ss = None
                    if annotate:
                        ss, ts = rnaview.annotate(ts, canonical_only = canonical_only)
                    save(db, ss, ts, pdb_id, limit)

                except Exception, e:
                    print e
                    print "No annotation for %s"%pdb_id
                    save(db, None, ts, pdb_id, limit)
def test():
    print "Recovering entry 1EHZ from Protein Databank...\n"
    pdb = PDB()
    tertiary_structures = parse_pdb(pdb.get_entry('1EHZ'))

    print "## 3D annotation ##\n"

    print "List of base-pairs computed with RNAVIEW:\n"

    for ts in tertiary_structures:
        secondary_structure, tertiary_structure = Rnaview().annotate(ts)
        print secondary_structure_to_base_pairs(secondary_structure, keep_tertiaries = True)

    print "\n## 2D prediction ##\n"

    for ts in tertiary_structures:
        print "RNA sequence from 1EHZ:\n"
        print ts.rna.sequence
        print "\nList of base-pairs computed with RNAfold (RNA Vienna Package):\n"
        print Rnafold().fold(molecule=ts.rna)
                    save(db, ss, ts, pdb_id, limit)

                except Exception, e:
                    print e
                    print "No annotation for %s"%pdb_id
    else:
        pdb = PDB()
        rna3dHub = RNA3DHub()
        clusters = rna3dHub.get_clusters()
        print "%i 3Ds to process"%len(clusters)

        for cluster in clusters['pdb-ids']:
            if db['tertiaryStructures'].find_one({'source':"db:pdb:%s"%cluster[0]}):
                continue
            print "Recover %s"%cluster[0] #we use the first pdb_id in the list of ids making a cluster
            for ts in parsers.parse_pdb(pdb.get_entry(cluster[0])):
                try:
                    ss = None
                    if annotate:
                        ss, ts = rnaview.annotate(ts, canonical_only = canonical_only)
                    save(db, ss, ts, cluster[0], limit)

                except Exception, e:
                    print e
                    print "No annotation for %s"%cluster[0]

def save(db, secondary_structure, tertiary_structure, pdbId, limit):
    if db['junctions'].count() >= limit:
        print "Limit of %i junctions reached"%limit
        sys.exit()
Esempio n. 4
0
                except Exception, e:
                    print e
                    print "No annotation for %s"%pdb_id
                    save(db, None, ts, pdb_id, limit)
    else:
        pdb = PDB()
        rna3dHub = RNA3DHub()
        clusters = rna3dHub.get_clusters()
        print "%i 3Ds to process"%len(clusters)

        for cluster in clusters['pdb-ids']:
            pdb_id = cluster[0].split('|')[0]
            if db['tertiaryStructures'].find_one({'source':"db:pdb:%s"%pdb_id}):
                continue
            print "Recover %s"%pdb_id #we use the first pdb_id in the list of ids making a cluster
            for ts in parsers.parse_pdb(pdb.get_entry(pdb_id)):
                try:
                    ss = None
                    if annotate:
                        ss, ts = rnaview.annotate(ts, canonical_only = canonical_only)
                    save(db, ss, ts, pdb_id, limit)

                except Exception, e:
                    print e
                    print "No annotation for %s"%pdb_id
                    save(db, None, ts, pdb_id, limit)

def save(db, secondary_structure, tertiary_structure, pdbId, limit):
    if db['junctions'].count() >= limit:
        print "Limit of %i junctions reached"%limit
        sys.exit()
from pyrna.computations import Cmalign, Rnaview
from bson.objectid import ObjectId
import os

pdb = PDB()
cmalign = Cmalign()
rnaview = Rnaview()
rfam = Rfam(cache_dir = "/home/fjossinet/tmp/Rfam")

families_with_structures = rfam.get_families_with_structures()

for index, row in families_with_structures.iterrows():

    rfam_id = row['rfam_id']

    tertiary_structures = parse_pdb(pdb.get_entry(row['pdb_id']))

    reference_rna = None
    ts = None

    for tertiary_structure in tertiary_structures:
        if tertiary_structure.rna.name == row['chain_name']:
            ts = tertiary_structure
            reference_rna = ts.rna
            break

    if ts:
        secondary_structure, tertiary_structure = rnaview.annotate(tertiary_structure = ts)
        rnas, orgs, consensus_2d = cmalign.align([reference_rna], rfam_id = rfam_id, rfam = rfam)
        os.mkdir("/home/fjossinet/tmp/%s"%rfam_id)
        os.mkdir("/home/fjossinet/tmp/%s/Molecules"%rfam_id)
Esempio n. 6
0
from pyrna.computations import Cmalign, Rnaview
from bson.objectid import ObjectId
import os

pdb = PDB()
cmalign = Cmalign()
rnaview = Rnaview()
rfam = Rfam(cache_dir="/home/fjossinet/tmp/Rfam")

families_with_structures = rfam.get_families_with_structures()

for index, row in families_with_structures.iterrows():

    rfam_id = row['rfam_id']

    tertiary_structures = parse_pdb(pdb.get_entry(row['pdb_id']))

    reference_rna = None
    ts = None

    for tertiary_structure in tertiary_structures:
        if tertiary_structure.rna.name == row['chain_name']:
            ts = tertiary_structure
            reference_rna = ts.rna
            break

    if ts:
        secondary_structure, tertiary_structure = rnaview.annotate(
            tertiary_structure=ts)
        rnas, orgs, consensus_2d = cmalign.align([reference_rna],
                                                 rfam_id=rfam_id,