コード例 #1
0
def get_pdb_sstruct_info(pdb):
    """
    """
    statement = text("""
                    SELECT  pdb_chain_id, pdb_res_name, pdb_res_num, pdb_ins_code, sstruct_serial
                    FROM    pdb_dev.res_map
                    WHERE   pdb = :pdb
                    """)

    result = engine.execute(statement, pdb=pdb.upper()).fetchall()
    sstruct_info = {}

    # iterate through secondary structure mapping for this pdb sequence
    for row in result:

        # list is not hashable
        row = tuple(row.values())

        # create a key/value pair in the form pdb id => sstruct serial
        sstruct_info[row[:-1]] = row[-1]

    app.log.debug(
        "{0} protein fragments were identified through SIFTS.".format(
            len(sstruct_info)))

    return sstruct_info
コード例 #2
0
def get_pdb_ligand_info(pdb):
    """
    """
    SQL = text("""
                SELECT      pdb_chain_id, het_id, res_num, ins_code, 2 as entity_type_bm
                FROM        pdb_dev.ligands
                WHERE       pdb = :pdb AND (ins_code = '' OR ins_code = ' ') -- AND is_observed = true
                UNION
                SELECT      pdb_chain_id, NULL, NULL, ' ', 34 as entity_type_bm
                FROM        pdb_dev.peptide_ligands
                WHERE       pdb = :pdb
                ORDER BY    1, 3, 4
               """)

    result = engine.execute(SQL, pdb=pdb.upper()).fetchall()

    # create a mapping between the pdb identifier of the residue and its entity type
    ligands = (((row.pdb_chain_id, row.het_id, row.res_num, row.ins_code
                 or ' '), row.entity_type_bm) for row in result)
    ligands = dict(ligands)

    app.log.debug(
        "structure {0} contains {1} ligands according to mmCIF:".format(
            pdb, len(ligands)))

    for tup in ligands.keys():
        app.log.debug("    {0} {1} {2}".format(*tup))

    return ligands
コード例 #3
0
def get_credo_pdbs(generator=True):
    """
    Returns a list of all the PDB entries that are currently stored in CREDO.
    Important for doing incremental updates.
    """
    statement = text("SELECT pdb FROM credo.structures ORDER BY 1")
    result = engine.execute(statement).fetchall()

    if generator: return (row.pdb for row in result)

    return result
コード例 #4
0
def get_pdb_polymer_info(pdb):
    """
    Returns a complete polymer sequence residue mapping including the entity type
    bit mask.
    """
    statement = text("""
                    SELECT      pdb_strand_id as pdb_chain_id, pdb_mon_id as res_name,
                                pdb_seq_num::integer AS res_num,
                                CASE
                                    WHEN pdb_ins_code = '' THEN ' '
                                    ELSE pdb_ins_code
                                END AS ins_code,
                                CASE
                                    -- PROTEIN
                                    WHEN ep.type = 'polypeptide(L)' OR ep.type = 'polypeptide(D)' THEN 32
                                    -- DNA
                                    WHEN ep.type = 'polydeoxyribonucleotide' THEN 16
                                    -- RNA
                                    WHEN ep.type = 'polyribonucleotide' THEN 8
                                    -- DNA/RNA HYBRID
                                    WHEN ep.type = 'polydeoxyribonucleotide/polyribonucleotide hybrid' THEN 24
                                    -- POLYSACCHARIDE
                                    WHEN ep.type = 'polysaccharide(D)' THEN 4
                                    ELSE 0
                                END AS entity_type_bm
                    FROM        {mmcif}.pdbx_poly_seq_scheme p
                    JOIN        {mmcif}.entity_poly ep ON p.structure_id = ep.structure_id AND p.entity_id = ep.entity_id
                    WHERE       p.pdb_mon_id != '' AND p.Structure_ID = :pdb
                    ORDER BY    1, 3, 4
                     """.format(mmcif=app.config.get('database', 'mmcif')))

    result = engine.execute(statement, pdb=pdb.upper()).fetchall()

    # create a mapping between the pdb identifier of the residue and its entity type
    residues = (((row.pdb_chain_id, row.res_name, row.res_num, row.ins_code),
                 row.entity_type_bm) for row in result)
    residues = dict(residues)

    app.log.debug(
        "structure contains {0} polymer residues according to mmCIF.".format(
            len(residues)))

    return residues
コード例 #5
0
ファイル: mmcif.py プロジェクト: tlb-lab/credovi
def get_current_pdbs(args):
    '''
    Every structure not in this list is most likely obsolete.
    '''
    statement = text("""
                       SELECT structure_id as pdb
                         FROM {mmcif}.entry e
                    LEFT JOIN pdb_dev.banned b ON e.structure_id = b.pdb
                        WHERE b.pdb IS NULL
                     ORDER BY 1
                     """.format(mmcif=app.config.get('database', 'mmcif')))

    engine.echo = False  # Forced echo off since SQLAlchemy prints to STDOUT and output gets mixed with the PDB list.
    result = engine.execute(statement).fetchall()

    pdbs = (row['pdb'] for row in result)

    if args.offset: pdbs = apply_offset(pdbs, args.offset.upper())
    if args.limit: pdbs = apply_limit(pdbs, args.limit)

    return pdbs
コード例 #6
0
def get_biomt(pdb):
    """
    """
    statement = text("""
                        SELECT assembly_serial, assembly_size, is_monomeric,
                               pdb_chain_id, rotation, translation, is_at_identity
                          FROM pdb_dev.biomt b
                          JOIN pdb_dev.biomt_ops o USING(biomt_id)
                         WHERE pdb = :pdb
                      ORDER BY 1,3
                     """)

    # fetch data from pisa database
    result = engine.execute(statement, pdb=pdb.upper()).fetchall()

    biomt = {}
    is_monomeric = False

    # iterate through assemblies
    for (assembly_serial, assembly_size,
         is_monomeric), chain_iter in groupby(result, key=itemgetter(0, 1, 2)):
        biomt[assembly_serial] = {}

        # do not return pisa data for large assemblies / asu will be used instead
        if assembly_size > 26:
            app.log.warn(
                "one of the predicted assemblies contains {} chains - "
                "the asymmetric unit will be used instead.".format(
                    assembly_size))
            biomt = {}
            break

        # the complete ASU is monomeric and has to be split into individual chains
        if is_monomeric: break

        # iterate through chains
        for pdb_chain_id, operation_iter in groupby(chain_iter,
                                                    key=itemgetter(3)):
            biomt[assembly_serial].update({str(pdb_chain_id): {}})

            for operation_serial, operation in enumerate(operation_iter, 1):
                rotation, translation, is_at_identity = operation[4:]

                details = {
                    'rotation': OEFloatArray(rotation),
                    'translation': OEFloatArray(translation),
                    'is_at_identity': is_at_identity
                }

                biomt[assembly_serial][pdb_chain_id][
                    operation_serial] = details

    # debug assembly information
    try:
        app.log.debug("BIOMT contains {0} assembly/assemblies.".format(
            max(biomt.keys())))

    # PDB entry does not have a REMARK 350
    except ValueError:
        app.log.debug("NO REMARK 350 found.")

    return biomt, is_monomeric
コード例 #7
0
ファイル: fuzcav.py プロジェクト: tlb-lab/credovi
def do(controller):
    """
    """
    # timer to clock functions and parts of the program
    timer = Timer()
    timer.start("app")

    # get the controller command
    cmd = controller.command

    # get the command line arguments and options
    args = controller.pargs

    insert = binding_site_fuzcav.insert()
    tracker = fuzcav.get_tracker()

    # get the fuzcav side chain representative table from the credoscript metadata
    metadata.reflect(schema='bio', only=('fuzcav_rep_sc_atoms', ))
    fuzcav_rep_sc_atoms = Table('bio.fuzcav_rep_sc_atoms',
                                metadata,
                                autoload=True)

    timer.start()

    session = Session()

    # get all ligands that have more than 7 heavy atoms and no clashes
    query = session.query(Ligand.ligand_id, Ligand.biomolecule_id)
    query = query.filter(
        and_(Ligand.num_hvy_atoms >= 7, Ligand.is_clashing == False))

    if args.incremental:

        # subquery to get the current max ligand_id from the binding_site_fuzcav table
        sq = session.query(
            func.max(binding_site_fuzcav.c.ligand_id).label(
                'ligand_id')).subquery('sq')

        # only include new ligands
        query = query.filter(Ligand.ligand_id > sq.c.ligand_id)

    ligand_ids = query.order_by(Ligand.ligand_id).all()

    # debug how much time it took to get all contacts
    app.log.debug(
        "all new ligand identifiers retrieved in {0:.2f} seconds.".format(
            timer.elapsed()))

    #
    query = BindingSiteResidue.query.join('Peptide', 'Atoms')
    #query = query.join(Peptide, Peptide.residue_id==BindingSiteResidue.residue_id)
    #query = query.join(Atom, Atom.residue_id==Peptide.residue_id)
    query = query.outerjoin(
        fuzcav_rep_sc_atoms,
        and_(fuzcav_rep_sc_atoms.c.res_name == Peptide.res_name,
             fuzcav_rep_sc_atoms.c.atom_name == Atom.atom_name))
    query = query.filter(
        and_(
            Peptide.is_non_std == False,
            or_(Atom.atom_name == 'CA',
                fuzcav_rep_sc_atoms.c.atom_name != None)))
    query = query.with_entities(Peptide.res_name, Atom)

    if args.progressbar:
        bar = ProgressBar(widgets=[
            'Binding Sites: ',
            SimpleProgress(), ' ',
            Percentage(),
            Bar()
        ],
                          maxval=len(ligand_ids)).start()

    # iterate through ligands
    for counter, row in enumerate(ligand_ids, 1):
        if args.progressbar: bar.update(counter)
        ligand_id, biomolecule_id = row.ligand_id, row.biomolecule_id

        timer.start()

        # get all the fuzcav atoms (either CA or representative)
        # important to use the proper atom partition!
        atoms = query.filter(
            and_(BindingSiteResidue.ligand_id == ligand_id,
                 Atom.biomolecule_id == biomolecule_id)).all()

        # debug how much time it took to get all contacts
        app.log.debug("all FuzCav atoms retrieved in {0:.2f} seconds.".format(
            timer.elapsed()))

        # ignore hits with too few peptides
        if len(atoms) < 14:
            app.log.debug("Ligand {} has only {} FuzCav atoms and will be "
                          "ignored.".format(ligand_id, len(atoms)))
            continue

        # get the calpha atom and its features for each residue
        calphas = ((np.array(atom.coords,
                             dtype=float), (fuzcav.FEATURES[res_name]))
                   for res_name, atom in atoms if atom.atom_name == 'CA')

        # get the representative atom and its features for each residue
        representatives = (
            (np.array(atom.coords, dtype=float), (fuzcav.FEATURES[res_name]))
            for res_name, atom in atoms
            if atom.atom_name == fuzcav.REPRESENTATIVES[res_name])

        timer.start()

        calphafp = fuzcav.make_fp(calphas, tracker)
        repfp = fuzcav.make_fp(representatives, tracker)

        # debug how much time it took to get all contacts
        app.log.debug("fingerprints generated in {0:.2f} seconds.".format(
            timer.elapsed()))

        # insert the fingerprints into the table
        if not args.dry_run:
            engine.execute(insert,
                           ligand_id=ligand_id,
                           calphafp=calphafp.tolist(),
                           repfp=repfp.tolist())

    # finish the optional progress bar
    if args.progressbar: bar.finish()

    session.close()