PrimaryKeyConstraint(biomolecules.c.biomolecule_id, deferrable=True, initially='deferred') Index('idx_biomolecules_structures_id', biomolecules.c.structure_id, biomolecules.c.assembly_serial, unique=True) Index('idx_biomolecules_path', biomolecules.c.path, postgresql_using='gist') comments = { "table": "Stores the biological assemblies that are generated from an asymmetric unit.", "columns": { "biomolecule_id": "Primary key.", "structure_id": "Primary key of the parent structure.", "path": "ptree path in the form PDB/assembly serial number.", "assembly_serial": "Serial number of the assembly. Can be > 1 if asymmetric unit contains more than one biological assembly.", "assembly_type": "Assembly type of the biomolecule, e.g. Monomeric.", "conformational_state_bm": "Bit mask of the conformational state of the biomolecule.", "structural_interaction_bm": "Bit mask of the kinds of structural interactions that are present.", "num_chains": "Number of chains in biological assembly.", "num_ligands": "Number of ligands in biological assembly.", "num_atoms": "Number of atoms in biological assembly." } } comment_on_table_elements(biomolecules, comments)
pi_groups_comments = { "table": "Contains the non-ring pi groups found in structures.", "columns": { "pi_id": "Primary Key of the pi system.", "biomolecule_id": "Foreign key of the parent biomolecule.", "pi_serial": "Pi system serial number inside the parent biomolecule.", #"pi_number": "Number of the pi system inside the residue it is part of.", "size": "Number of atoms in the pi group.", "centroid": "The Centroid of the pi group as vector3d type.", "normal": "The normal of the pi group as vector3d type.", #"is_hetero_aromatic": "True if the aromatic ring contains atoms other than carbon." } } comment_on_table_elements(pi_groups, pi_groups_comments) ## table to hold pi group atoms CURRENT_BIOMOL_MAX = app.config.get('schema', 'current_biomol_max') PI_GROUP_PARTITION_SIZE = app.config.get( 'schema', 'pi_group_partition_size') # In terms of biomolecules PI_GROUP_INS_RULE_DDL = """ CREATE OR REPLACE RULE {rule} AS ON INSERT TO {schema}.{master_table} WHERE (biomolecule_id > {part_bound_low} AND biomolecule_id <= {part_bound_high}) DO INSTEAD INSERT INTO {schema}.{table} VALUES (NEW.*) """ ## pi group to residues map pi_group_res = Table('pi_group_residues',
"pdb": "PDB 4-letter code.", "title": "Title of the PDB deposition.", "authors": "Concatenated string of author names.", "method": "Method used in the experiment (exptl.method).", "deposition_date": "Date the entry first entered the PDB database in the form yyyy-mm-dd. Taken from the PDB HEADER record (database_PDB_rev.date_original).", "modified_date": "Date the latest PDB revision took place. Taken from the REVDAT record (database_PDB_rev.date).", "resolution": "Smallest value for the interplanar spacings for the reflection dataused in the refinement in angstroms. This is called the highest resolution (refine.ls_d_res_high).", "r_factor": "Residual factor R for reflections* that satisfy the reflns.observed_criterion were included in the refinement (when the refinement included the calculation of a free R factor) (refine.ls_R_factor_R_work).", "r_free": "Residual factor R for reflections that satisfy the reflns.observed_criterion that were used as test reflections (i.e. were excluded from the refinement) (refine.ls_R_factor_R_free).", "ph": "pH at which the crystal was grown (exptl_crystal_grow.pH).", "dpi": "Diffraction-component Precision Index.", "dpi_theoretical_min": "Theoretical minimum of the DPI.", "num_biomolecules": "Number of biological assemblies in the asymmetric unit.", } } comment_on_table_elements(structures, comments)
"ring_serial": "Aromatic ring serial number inside the parent biomolecule.", "ring_number": "Number of the aromatic ring inside the residue it is part of.", "size": "Number of atoms in the aromatic ring.", "centroid": "The Centroid of the aromatic ring as vector3d type.", "normal": "The normal of the aromatic ring as vector3d type.", "is_hetero_aromatic": "True if the aromatic ring contains atoms other than carbon." } } comment_on_table_elements(aromatic_rings, aromatic_ring_comments) aromatic_ring_atoms = Table('aromatic_ring_atoms', metadata, Column('aromatic_ring_atom_id', Integer, primary_key=True), Column('aromatic_ring_id', Integer, nullable=False), Column('atom_id', Integer, nullable=False), schema=schema) Index('idx_aromatic_ring_atoms_aromatic_ring_id', aromatic_ring_atoms.c.aromatic_ring_id) Index('idx_aromatic_ring_atoms_atom_id', aromatic_ring_atoms.c.atom_id)
"chain_end_id": "Primary key of the second chain.", "path": "ptree path in the form PDB/assembly serial number/I:PDB chain bgn ID-PDB chain end ID", "num_res_bgn": "Number of residues of the first chain that are part of this interface.", "num_res_end": "Number of residues of the second chain that are part of this interface.", "is_quaternary": "True if both chains are at identity, i.e. exist in the asymmetric unit.", "has_mod_res": "True if at least one modified residue if part of the interface.", "has_missing_atoms": "True if at least one residue has missing atoms." } } comment_on_table_elements(interfaces, interface_comments) interface_peptide_pairs = Table('interface_peptide_pairs', metadata, Column('interface_id', Integer, autoincrement=False), Column('residue_bgn_id', Integer, autoincrement=False), Column('residue_end_id', Integer, autoincrement=False), schema=schema) PrimaryKeyConstraint(interface_peptide_pairs.c.interface_id,
{ "prot_fragment_id": "Primary key of the protein fragment.", "biomolecule_id": "Primary key of the biomolecule the secondary structure fragment belongs to.", "chain_id": "Primary key of the chain the secondary structure belongs to.", "path": "ptree", "sstruct_serial": "Secondary structure serial number inside the chain.", "sstruct": "Secondary structure DSSP type.", "fragment_size": "Number of residues that form the protein fragment.", "fragment_seq": "Fragment one-letter-code sequence.", "prot_fragment_nterm_id": "N-terminal protein fragment neighbour.", "prot_fragment_cterm_id": "C-terminal protein fragment neighbour.", "completeness": "Simple measure to quantify the completeness of the protein fragment: is is the number of its residues that are in CREDO divided by the size of the fragment." } } comment_on_table_elements(prot_fragments, prot_fragment_comments) prot_fragment_residues = Table('prot_fragment_residues', metadata, Column('prot_fragment_id', Integer, nullable=False), Column('residue_id', Integer, nullable=False), schema=schema) PrimaryKeyConstraint(prot_fragment_residues.c.prot_fragment_id, prot_fragment_residues.c.residue_id, deferrable=True, initially='deferred') Index('idx_prot_fragment_residues_residue_id', prot_fragment_residues.c.residue_id) prot_fragment_residue_comments = { "table": "Mapping between protein fragments and residues in CREDO - The database contains only residue interacting with other entities though, so some residues are missing.", "columns": {
"residue_id": "Primary key.", "biomolecule_id": "Primary key of the parent biomolecule.", "chain_id": "Primary key of the parent chain.", "path": "ptree path in the form PDB/assembly serial number/PDB chain ID/PDB residue name[residue insertion code]`PDB residue number.", "res_name": "Three-letter name of the residue.", "res_num": "PDB residue number.", "ins_code": "PDB insertion code.", "entity_type_bm": "Entity type bitmask (the bits are solvent, ligand, saccharide, rna, dna, protein).", "is_disordered": "True if the residue has disordered atoms.", "is_incomplete": "True if the residue has missing atoms." } } comment_on_table_elements(residues, residue_comments) ### create the table partitions for the different polymer residue types # polypeptide residues peptides = Table( 'peptides', metadata, Column('residue_id', Integer, nullable=False), Column('biomolecule_id', Integer, nullable=False), Column('chain_id', Integer, nullable=False), Column('path', PTree, nullable=False), Column('res_name', String(3), nullable=False), Column('res_num', Integer, nullable=False), Column('ins_code', String(1), nullable=False), # ' ' Column('entity_type_bm', Integer, nullable=False),
"is_covalent": "", "is_vdw_clash": "", "is_vdw": "", "is_proximal": "", "is_hbond": "", "is_weak_hbond": "", "is_xbond": "", "is_ionic": "", "is_metal_complex": "", "is_aromatic": "", "is_hydrophobic": "", "is_carbonyl": "" } } comment_on_table_elements(contacts, comments) create_partition_insert_trigger(contacts, CONTACTS_PARTITION_SIZE) # create new paritions for every 5000 biomolecules partitions = range(0, CURRENT_BIOMOL_MAX + CONTACTS_PARTITION_SIZE, CONTACTS_PARTITION_SIZE) for part_bound_low, part_bound_high in zip(partitions[:-1], partitions[1:]): tablename = 'contacts_biomol_le_{0}'.format(part_bound_high) rulename = tablename + '_insert' partition = Table( tablename, metadata, Column('contact_id', Integer, primary_key=True, nullable=False), Column('biomolecule_id', Integer, nullable=False),
"chain_seq": "Sequence of the polymer.", "chain_seq_md5": "MD5 Hash of the chain sequence.", "rotation": "Rotation matrix.", "translation": "Translation vector.", "is_at_identity": "True if the chain is at identity, i.e. no transformation was performed.", "has_disordered_regions": "True if the chain contains at least one disordered region (unobserved residues)." } } comment_on_table_elements(chains, chain_comments) ## chain subsets: polypeptides, oligonucleotides and polysaccharides # polypeptides are chains where the type in mmCIF is polypeptide polypeptides = Table('polypeptides', metadata, Column('chain_id', Integer, nullable=False), Column('is_wildtype', Boolean(create_constraint=False), DefaultClause('false'), nullable=False), Column('is_human', Boolean(create_constraint=False), DefaultClause('false'), nullable=False),
"is_acceptor": "", "is_aromatic": "", "is_weak_acceptor": "", "is_weak_donor": "", "is_hydrophobe": "", "is_metal": "", "is_pos_ionisable": "", "is_neg_ionisable": "", "is_xbond_donor": "", "is_xbond_acceptor": "", "is_carbonyl_oxygen": "", "is_carbonyl_carbon": "" } } comment_on_table_elements(atoms, comments) create_partition_insert_trigger(atoms, ATOMS_PARTITION_SIZE) # create new paritions for every X biomolecules partitions = range(0, CURRENT_BIOMOL_MAX + ATOMS_PARTITION_SIZE, ATOMS_PARTITION_SIZE) for part_bound_low, part_bound_high in zip(partitions[:-1], partitions[1:]): tablename = 'atoms_biomol_le_{0}'.format(part_bound_high) rulename = tablename + '_insert' partition = Table( tablename, metadata, Column('atom_id', Integer, primary_key=True), Column('biomolecule_id', Integer, nullable=False),
from credovi.util.sqlalchemy import comment_on_table_elements xrefs = Table('xrefs', metadata, Column('xref_id', Integer, nullable=False), Column('entity_type', String(12), nullable=False), Column('entity_id', Integer, nullable=False), Column('source', String(32), nullable=False), Column('xref', String(64), nullable=False), Column('description', Text), schema=schema) PrimaryKeyConstraint(xrefs.c.xref_id, deferrable=True, initially='deferred') Index('idx_xrefs_entity_type', xrefs.c.entity_type, xrefs.c.entity_id) Index('idx_xrefs_xref', xrefs.c.source, xrefs.c.xref) comments = { "table": "Contains cross references between entities in CREDO and external databases.", "columns": { "xref_id": "Primary key of the cross reference.", "entity_id": "Primary key of the CREDO entity.", "entity_type": "Type of the CREDO entity, e.g. Ligand.", "source": "Name of the external database.", "xref": "cross reference in the external database.", "description": "Further description of the cross reference." } } comment_on_table_elements(xrefs, comments)
"groove_id": "", "biomolecule_id": "Primary key of the parent biomolecule.", "chain_prot_id": "Primary key of the polypeptide chain.", "chain_nuc_id": "Primary key of the oligonucleotide chain.", "path": "ptree path in the form PDB/assembly serial number/G:PDB chain prot ID-PDB chain nuc ID", "num_res_prot": "Number of polypetide residues that are interacting.", "num_res_nuc": "Number of oligonucleotide residues that are interacting.", "nucleic_acid_type": "Type of the nucleic acid, either DNA/RNA or hybrid.", "has_missing_atoms": "True if at least one residue has missing atoms." } } comment_on_table_elements(grooves, groove_comments) groove_residue_pairs = Table('groove_residue_pairs', metadata, Column('groove_id', Integer, autoincrement=False, primary_key=True), Column('residue_prot_id', Integer, autoincrement=False, primary_key=True), Column('residue_nuc_id', Integer, autoincrement=False, primary_key=True),