Example #1
0
PrimaryKeyConstraint(biomolecules.c.biomolecule_id,
                     deferrable=True,
                     initially='deferred')
Index('idx_biomolecules_structures_id',
      biomolecules.c.structure_id,
      biomolecules.c.assembly_serial,
      unique=True)
Index('idx_biomolecules_path', biomolecules.c.path, postgresql_using='gist')

comments = {
    "table":
    "Stores the biological assemblies that are generated from an asymmetric unit.",
    "columns": {
        "biomolecule_id": "Primary key.",
        "structure_id": "Primary key of the parent structure.",
        "path": "ptree path in the form PDB/assembly serial number.",
        "assembly_serial":
        "Serial number of the assembly. Can be > 1 if asymmetric unit contains more than one biological assembly.",
        "assembly_type": "Assembly type of the biomolecule, e.g. Monomeric.",
        "conformational_state_bm":
        "Bit mask of the conformational state of the biomolecule.",
        "structural_interaction_bm":
        "Bit mask of the kinds of structural interactions that are present.",
        "num_chains": "Number of chains in biological assembly.",
        "num_ligands": "Number of ligands in biological assembly.",
        "num_atoms": "Number of atoms in biological assembly."
    }
}

comment_on_table_elements(biomolecules, comments)
Example #2
0
pi_groups_comments = {
    "table": "Contains the non-ring pi groups found in structures.",
    "columns": {
        "pi_id": "Primary Key of the pi system.",
        "biomolecule_id": "Foreign key of the parent biomolecule.",
        "pi_serial": "Pi system serial number inside the parent biomolecule.",
        #"pi_number": "Number of the pi system inside the residue it is part of.",
        "size": "Number of atoms in the pi group.",
        "centroid": "The Centroid of the pi group as vector3d type.",
        "normal": "The normal of the pi group as vector3d type.",
        #"is_hetero_aromatic": "True if the aromatic ring contains atoms other than carbon."
    }
}

comment_on_table_elements(pi_groups, pi_groups_comments)

## table to hold pi group atoms
CURRENT_BIOMOL_MAX = app.config.get('schema', 'current_biomol_max')
PI_GROUP_PARTITION_SIZE = app.config.get(
    'schema', 'pi_group_partition_size')  # In terms of biomolecules

PI_GROUP_INS_RULE_DDL = """
                        CREATE OR REPLACE RULE {rule} AS
                            ON INSERT TO {schema}.{master_table}
                        WHERE (biomolecule_id > {part_bound_low} AND biomolecule_id <= {part_bound_high})
                        DO INSTEAD INSERT INTO {schema}.{table} VALUES (NEW.*)
                        """

## pi group to residues map
pi_group_res = Table('pi_group_residues',
Example #3
0
        "pdb":
        "PDB 4-letter code.",
        "title":
        "Title of the PDB deposition.",
        "authors":
        "Concatenated string of author names.",
        "method":
        "Method used in the experiment (exptl.method).",
        "deposition_date":
        "Date the entry first entered the PDB database in the form yyyy-mm-dd. Taken from the PDB HEADER record (database_PDB_rev.date_original).",
        "modified_date":
        "Date the latest PDB revision took place. Taken from the REVDAT record (database_PDB_rev.date).",
        "resolution":
        "Smallest value for the interplanar spacings for the reflection dataused in the refinement in angstroms. This is called the highest resolution (refine.ls_d_res_high).",
        "r_factor":
        "Residual factor R for reflections* that satisfy the reflns.observed_criterion were included in the refinement (when the refinement included the calculation of a free R factor) (refine.ls_R_factor_R_work).",
        "r_free":
        "Residual factor R for reflections that satisfy the reflns.observed_criterion that were used as test reflections (i.e. were excluded from the refinement) (refine.ls_R_factor_R_free).",
        "ph":
        "pH at which the crystal was grown (exptl_crystal_grow.pH).",
        "dpi":
        "Diffraction-component Precision Index.",
        "dpi_theoretical_min":
        "Theoretical minimum of the DPI.",
        "num_biomolecules":
        "Number of biological assemblies in the asymmetric unit.",
    }
}

comment_on_table_elements(structures, comments)
Example #4
0
        "ring_serial":
        "Aromatic ring serial number inside the parent biomolecule.",
        "ring_number":
        "Number of the aromatic ring inside the residue it is part of.",
        "size":
        "Number of atoms in the aromatic ring.",
        "centroid":
        "The Centroid of the aromatic ring as vector3d type.",
        "normal":
        "The normal of the aromatic ring as vector3d type.",
        "is_hetero_aromatic":
        "True if the aromatic ring contains atoms other than carbon."
    }
}

comment_on_table_elements(aromatic_rings, aromatic_ring_comments)

aromatic_ring_atoms = Table('aromatic_ring_atoms',
                            metadata,
                            Column('aromatic_ring_atom_id',
                                   Integer,
                                   primary_key=True),
                            Column('aromatic_ring_id', Integer,
                                   nullable=False),
                            Column('atom_id', Integer, nullable=False),
                            schema=schema)

Index('idx_aromatic_ring_atoms_aromatic_ring_id',
      aromatic_ring_atoms.c.aromatic_ring_id)
Index('idx_aromatic_ring_atoms_atom_id', aromatic_ring_atoms.c.atom_id)
Example #5
0
        "chain_end_id": "Primary key of the second chain.",
        "path":
        "ptree path in the form PDB/assembly serial number/I:PDB chain bgn ID-PDB chain end ID",
        "num_res_bgn":
        "Number of residues of the first chain that are part of this interface.",
        "num_res_end":
        "Number of residues of the second chain that are part of this interface.",
        "is_quaternary":
        "True if both chains are at identity, i.e. exist in the asymmetric unit.",
        "has_mod_res":
        "True if at least one modified residue if part of the interface.",
        "has_missing_atoms": "True if at least one residue has missing atoms."
    }
}

comment_on_table_elements(interfaces, interface_comments)

interface_peptide_pairs = Table('interface_peptide_pairs',
                                metadata,
                                Column('interface_id',
                                       Integer,
                                       autoincrement=False),
                                Column('residue_bgn_id',
                                       Integer,
                                       autoincrement=False),
                                Column('residue_end_id',
                                       Integer,
                                       autoincrement=False),
                                schema=schema)

PrimaryKeyConstraint(interface_peptide_pairs.c.interface_id,
Example #6
0
    {
        "prot_fragment_id": "Primary key of the protein fragment.",
        "biomolecule_id": "Primary key of the biomolecule the secondary structure fragment belongs to.",
        "chain_id": "Primary key of the chain the secondary structure belongs to.",
        "path": "ptree",
        "sstruct_serial": "Secondary structure serial number inside the chain.",
        "sstruct": "Secondary structure DSSP type.",
        "fragment_size": "Number of residues that form the protein fragment.",
        "fragment_seq": "Fragment one-letter-code sequence.",
        "prot_fragment_nterm_id": "N-terminal protein fragment neighbour.",
        "prot_fragment_cterm_id": "C-terminal protein fragment neighbour.",
        "completeness": "Simple measure to quantify the completeness of the protein fragment: is is the number of its residues that are in CREDO divided by the size of the fragment."
    }
}

comment_on_table_elements(prot_fragments, prot_fragment_comments)

prot_fragment_residues = Table('prot_fragment_residues', metadata,
                               Column('prot_fragment_id', Integer, nullable=False),
                               Column('residue_id', Integer, nullable=False),
                               schema=schema)

PrimaryKeyConstraint(prot_fragment_residues.c.prot_fragment_id,
                     prot_fragment_residues.c.residue_id, deferrable=True,
                     initially='deferred')
Index('idx_prot_fragment_residues_residue_id', prot_fragment_residues.c.residue_id)

prot_fragment_residue_comments = {
    "table": "Mapping between protein fragments and residues in CREDO - The database contains only residue interacting with other entities though, so some residues are missing.",
    "columns":
    {
Example #7
0
        "residue_id": "Primary key.",
        "biomolecule_id": "Primary key of the parent biomolecule.",
        "chain_id": "Primary key of the parent chain.",
        "path":
        "ptree path in the form PDB/assembly serial number/PDB chain ID/PDB residue name[residue insertion code]`PDB residue number.",
        "res_name": "Three-letter name of the residue.",
        "res_num": "PDB residue number.",
        "ins_code": "PDB insertion code.",
        "entity_type_bm":
        "Entity type bitmask (the bits are solvent, ligand, saccharide, rna, dna, protein).",
        "is_disordered": "True if the residue has disordered atoms.",
        "is_incomplete": "True if the residue has missing atoms."
    }
}

comment_on_table_elements(residues, residue_comments)

### create the table partitions for the different polymer residue types

# polypeptide residues
peptides = Table(
    'peptides',
    metadata,
    Column('residue_id', Integer, nullable=False),
    Column('biomolecule_id', Integer, nullable=False),
    Column('chain_id', Integer, nullable=False),
    Column('path', PTree, nullable=False),
    Column('res_name', String(3), nullable=False),
    Column('res_num', Integer, nullable=False),
    Column('ins_code', String(1), nullable=False),  # ' '
    Column('entity_type_bm', Integer, nullable=False),
Example #8
0
        "is_covalent": "",
        "is_vdw_clash": "",
        "is_vdw": "",
        "is_proximal": "",
        "is_hbond": "",
        "is_weak_hbond": "",
        "is_xbond": "",
        "is_ionic": "",
        "is_metal_complex": "",
        "is_aromatic": "",
        "is_hydrophobic": "",
        "is_carbonyl": ""
    }
}

comment_on_table_elements(contacts, comments)
create_partition_insert_trigger(contacts, CONTACTS_PARTITION_SIZE)

# create new paritions for every 5000 biomolecules
partitions = range(0, CURRENT_BIOMOL_MAX + CONTACTS_PARTITION_SIZE,
                   CONTACTS_PARTITION_SIZE)

for part_bound_low, part_bound_high in zip(partitions[:-1], partitions[1:]):
    tablename = 'contacts_biomol_le_{0}'.format(part_bound_high)
    rulename = tablename + '_insert'

    partition = Table(
        tablename,
        metadata,
        Column('contact_id', Integer, primary_key=True, nullable=False),
        Column('biomolecule_id', Integer, nullable=False),
Example #9
0
        "chain_seq":
        "Sequence of the polymer.",
        "chain_seq_md5":
        "MD5 Hash of the chain sequence.",
        "rotation":
        "Rotation matrix.",
        "translation":
        "Translation vector.",
        "is_at_identity":
        "True if the chain is at identity, i.e. no transformation was performed.",
        "has_disordered_regions":
        "True if the chain contains at least one disordered region (unobserved residues)."
    }
}

comment_on_table_elements(chains, chain_comments)

## chain subsets: polypeptides, oligonucleotides and polysaccharides

# polypeptides are chains where the type in mmCIF is polypeptide
polypeptides = Table('polypeptides',
                     metadata,
                     Column('chain_id', Integer, nullable=False),
                     Column('is_wildtype',
                            Boolean(create_constraint=False),
                            DefaultClause('false'),
                            nullable=False),
                     Column('is_human',
                            Boolean(create_constraint=False),
                            DefaultClause('false'),
                            nullable=False),
Example #10
0
        "is_acceptor": "",
        "is_aromatic": "",
        "is_weak_acceptor": "",
        "is_weak_donor": "",
        "is_hydrophobe": "",
        "is_metal": "",
        "is_pos_ionisable": "",
        "is_neg_ionisable": "",
        "is_xbond_donor": "",
        "is_xbond_acceptor": "",
        "is_carbonyl_oxygen": "",
        "is_carbonyl_carbon": ""
    }
}

comment_on_table_elements(atoms, comments)
create_partition_insert_trigger(atoms, ATOMS_PARTITION_SIZE)

# create new paritions for every X biomolecules
partitions = range(0, CURRENT_BIOMOL_MAX + ATOMS_PARTITION_SIZE,
                   ATOMS_PARTITION_SIZE)

for part_bound_low, part_bound_high in zip(partitions[:-1], partitions[1:]):
    tablename = 'atoms_biomol_le_{0}'.format(part_bound_high)
    rulename = tablename + '_insert'

    partition = Table(
        tablename,
        metadata,
        Column('atom_id', Integer, primary_key=True),
        Column('biomolecule_id', Integer, nullable=False),
Example #11
0
from credovi.util.sqlalchemy import comment_on_table_elements

xrefs = Table('xrefs',
              metadata,
              Column('xref_id', Integer, nullable=False),
              Column('entity_type', String(12), nullable=False),
              Column('entity_id', Integer, nullable=False),
              Column('source', String(32), nullable=False),
              Column('xref', String(64), nullable=False),
              Column('description', Text),
              schema=schema)

PrimaryKeyConstraint(xrefs.c.xref_id, deferrable=True, initially='deferred')
Index('idx_xrefs_entity_type', xrefs.c.entity_type, xrefs.c.entity_id)
Index('idx_xrefs_xref', xrefs.c.source, xrefs.c.xref)

comments = {
    "table":
    "Contains cross references between entities in CREDO and external databases.",
    "columns": {
        "xref_id": "Primary key of the cross reference.",
        "entity_id": "Primary key of the CREDO entity.",
        "entity_type": "Type of the CREDO entity, e.g. Ligand.",
        "source": "Name of the external database.",
        "xref": "cross reference in the external database.",
        "description": "Further description of the cross reference."
    }
}

comment_on_table_elements(xrefs, comments)
Example #12
0
        "groove_id": "",
        "biomolecule_id": "Primary key of the parent biomolecule.",
        "chain_prot_id": "Primary key of the polypeptide chain.",
        "chain_nuc_id": "Primary key of the oligonucleotide chain.",
        "path":
        "ptree path in the form PDB/assembly serial number/G:PDB chain prot ID-PDB chain nuc ID",
        "num_res_prot": "Number of polypetide residues that are interacting.",
        "num_res_nuc":
        "Number of oligonucleotide residues that are interacting.",
        "nucleic_acid_type":
        "Type of the nucleic acid, either DNA/RNA or hybrid.",
        "has_missing_atoms": "True if at least one residue has missing atoms."
    }
}

comment_on_table_elements(grooves, groove_comments)

groove_residue_pairs = Table('groove_residue_pairs',
                             metadata,
                             Column('groove_id',
                                    Integer,
                                    autoincrement=False,
                                    primary_key=True),
                             Column('residue_prot_id',
                                    Integer,
                                    autoincrement=False,
                                    primary_key=True),
                             Column('residue_nuc_id',
                                    Integer,
                                    autoincrement=False,
                                    primary_key=True),