예제 #1
0
def create_vocabulary_table_if_not_exist(model, schema_name, table_name,
                                         comment):
    schema = model.schemas[schema_name]
    if table_name not in schema.tables:
        schema.create_table(
            Table.define_vocabulary(table_name, 'PDB:{RID}', comment=comment))
        print('Created table {}:{}'.format(schema_name, table_name))
예제 #2
0
 def create_species_table(self):
     schema = self.model.schemas[self.species_schema]
     table = schema.create_table(
         Table.define_vocabulary(
             self.species_table,
             '{prefix}:{{RID}}'.format(prefix=self.curie_prefix),
             key_defs=[Key.define(['Name'])],
             comment="Species"))
     table.columns["Alternate_IDs"].drop()
예제 #3
0
 def create_gene_type_table(self):
     schema = self.model.schemas[self.gene_type_schema]
     schema.create_table(
         Table.define_vocabulary(
             self.gene_type_table,
             '{prefix}:{{RID}}'.format(prefix=self.curie_prefix),
             key_defs=[Key.define(['Name'])],
             uri_template='https://{host}/id/{{RID}}'.format(
                 host=self.host),
             comment="Gene types"))
    def create_vocabulary_tables(self):
        schema = self.model.schemas[self.VOCABULARY]
        table_def = Table.define_vocabulary("Species",
                                            "deriva-demo:{RID}",
                                            provide_system=True,
                                            key_defs=[Key.define(['Name'])],
                                            comment="Species")
        self.try_create_table(schema, table_def)

        table_def = Table.define_vocabulary(
            "Stage",
            "deriva-demo:{RID}",
            provide_system=True,
            key_defs=[Key.define(['Name'])],
            comment="Developmental Stage (e.g., Theiler stage, Carnegie stage)"
        )
        self.try_create_table(schema, table_def)

        table_def = Table.define_vocabulary("Sex",
                                            "deriva-demo:{RID}",
                                            provide_system=True,
                                            key_defs=[Key.define(['Name'])],
                                            comment="Sex")
        self.try_create_table(schema, table_def)

        table_def = Table.define_vocabulary(
            "Assay_Type",
            "deriva-demo:{RID}",
            provide_system=True,
            key_defs=[Key.define(['Name'])],
            comment="Assay type (e.g., mRNA-Seq, scRNA-Seq, ISH")
        self.try_create_table(schema, table_def)

        table_def = Table.define_vocabulary("Anatomy",
                                            "deriva-demo:{RID}",
                                            provide_system=True,
                                            comment="Anatomical Region")
        self.try_create_table(schema, table_def)

        table_def = Table.define_vocabulary(
            "Molecule_Type",
            "deriva-demo:{RID}",
            provide_system=True,
            key_defs=[Key.define(['Name'])],
            comment="Type of molecule (e.g., DNA, RNA)")
        self.try_create_table(schema, table_def)

        table_def = Table.define_vocabulary("File_Type",
                                            "deriva-demo:{RID}",
                                            provide_system=True,
                                            key_defs=[Key.define(['Name'])],
                                            comment="File type")
        self.try_create_table(schema, table_def)
예제 #5
0
 def create_ontology_table(self):
     schema = self.model.schemas[self.ontology_schema]
     table = schema.create_table(
         Table.define_vocabulary(
             self.ontology_table,
             '{prefix}:{{RID}}'.format(prefix=self.curie_prefix),
             key_defs=[Key.define(['Name'])],
             uri_template='https://{host}/id/{{RID}}'.format(
                 host=self.host),
             comment="Ontologies"))
     table.create_column(
         Column.define("Ontology_Home",
                       builtin_types.text,
                       nullok=True,
                       comment="Home page for this ontology"))
예제 #6
0
    def create_gene_table(self, extra_boolean_cols=[]):
        schema = self.model.schemas[self.gene_schema]
        common_cols = [
            Column.define("Gene_Type", builtin_types.text, nullok=False),
            Column.define("Species", builtin_types.text, nullok=False),
            Column.define("Chromosome", builtin_types.text),
            Column.define("Location",
                          builtin_types.text,
                          comment="Location on chromosome"),
            Column.define(
                "Source_Date",
                builtin_types.date,
                comment="Last-updated date reported by the gene data source")
        ]
        for colname in extra_boolean_cols:
            common_cols.append(Column.define(colname, builtin_types.boolean))

        fkey_defs = [
            ForeignKey.define(["Gene_Type"], self.gene_type_schema,
                              self.gene_type_table,
                              self.adjust_fkey_columns_case(["ID"])),
            ForeignKey.define(["Species"], self.species_schema,
                              self.species_table,
                              self.adjust_fkey_columns_case(
                                  self.species_schema, self.species_table,
                                  ["ID"])),
            ForeignKey.define(["Chromosome"], self.chromosome_schema,
                              self.chromosome_table, ["RID"])
        ]

        key_defs = [["NCBI_GeneID"]]

        table = schema.create_table(
            Table.define_vocabulary(
                self.gene_table,
                '{prefix}:{{RID}}'.format(prefix=self.curie_prefix),
                column_defs=column_defs,
                key_defs=key_defs,
                fkey_defs=fkey_defs,
                comment="Genes"))
def replace_vocab_table(schema_name,
                        old_table_name,
                        new_table_name,
                        replace_if_exists=False):
    """Replaces old vocab table with new and remaps all foreign keys from old to new."""
    schema = model.schemas[schema_name]

    # Drop new_vocab table if exists (optional)
    if not args.dryrun and new_table_name in schema.tables:
        if replace_if_exists:
            verbose("Found {tname}. Dropping...".format(tname=new_table_name))
            schema.tables[new_table_name].delete(catalog, schema)
        else:
            verbose("Found {tname}. Skipping...".format(tname=new_table_name))
            return

    # Define and create new vocab table
    extra_cols = [
        Column.define('dbxref',
                      builtin_types['text'],
                      comment='Legacy database external reference (dbxref).')
    ]
    if args.altids:
        extra_cols = [
            Column.define('alternate_ids',
                          builtin_types['text[]'],
                          comment='Alternate identifiers for this term.')
        ] + extra_cols
    vocab_table_def = Table.define_vocabulary(
        new_table_name,
        args.curie_template,
        uri_template='https://www.facebase.org/id/{RID}',
        column_defs=extra_cols)
    if not args.dryrun:
        new_table = schema.create_table(catalog, vocab_table_def)

    # Populate new vocab table
    datapaths = catalog.getPathBuilder()

    old_table_path = datapaths.schemas[schema_name].tables[old_table_name]
    kwargs = {
        'name': old_table_path.column_definitions['name'],
        'description': old_table_path.column_definitions['definition'],
        'synonyms': old_table_path.column_definitions['synonyms'],
        'dbxref': old_table_path.column_definitions['dbxref']
    }
    if args.altids:
        kwargs['alternate_dbxrefs'] = old_table_path.column_definitions[
            'alternate_dbxrefs']

    cleaned_terms = [
        clean_term(term) for term in old_table_path.entities(**kwargs)
    ]

    vverbose('Cleaned terms ready for insert into {tname}:'.format(
        tname=new_table_name))
    vverbose(list(cleaned_terms))

    # Create separate batches for insertion w/ defaults
    terms_w_ids = [
        term for term in cleaned_terms if term['id'] and len(term['id'])
    ]
    terms_w_no_ids = [
        term for term in cleaned_terms if not term['id'] or not len(term['id'])
    ]

    if not args.dryrun:
        new_table_path = datapaths.schemas[schema_name].tables[new_table_name]
        new_terms = list(new_table_path.insert(terms_w_ids, defaults=['uri']))
        new_terms += list(
            new_table_path.insert(terms_w_no_ids, defaults=['id', 'uri']))
        vverbose('New terms returned after insert into {tname}:'.format(
            tname=new_table_name))
        vverbose(list(new_terms))
    else:
        # This allows for best effort dryrun testing, though the local term CURIEs will be faked
        new_terms = cleaned_terms
        for term in new_terms:
            if not term['id']:
                term['id'] = term['dbxref'][:term['dbxref'].rindex(':')].upper(
                )

    # Create mapping of old dbxref to new id
    dbxref_to_id = {term['dbxref']: term['id'] for term in new_terms}

    # Find all references to old vocab table dbxref
    old_table = schema.tables[old_table_name]
    for fkey in old_table.referenced_by:

        if fkey_blacklist_pattern.match(fkey.names[0][1]):
            verbose('Skipping foreign key "{sname}:{cname}"'.format(
                sname=fkey.names[0][0], cname=fkey.names[0][1]))
            continue  # skip fkeys from vocab to vocab

        for i in range(len(fkey.referenced_columns)):
            # Get referenced column
            refcol = fkey.referenced_columns[i]

            # See if it references the dbxref of the old vocab table, if not skip
            if (refcol['schema_name'] != schema_name
                    or refcol['table_name'] != old_table_name
                    or refcol['column_name'] != 'dbxref'):
                continue

            # Get the corresponding referring table and its fkey column
            fkeycol = fkey.foreign_key_columns[i]
            reftable = model.schemas[fkeycol['schema_name']].tables[
                fkeycol['table_name']]
            verbose(
                'Found reference to "dbxref" from "{sname}:{tname}:{cname}"'.
                format(sname=fkeycol['schema_name'],
                       tname=fkeycol['table_name'],
                       cname=fkeycol['column_name']))

            # Delete the fkey
            if not args.dryrun:
                verbose('Deleting foreign key "{sname}:{cname}"'.format(
                    sname=fkey.names[0][0], cname=fkey.names[0][1]))
                fkey.delete(catalog, reftable)

            # Fix fkey column value
            verbose('Getting existing fkey column values')
            reftable_path = datapaths.schemas[fkeycol['schema_name']].tables[
                fkeycol['table_name']]
            entities = reftable_path.entities(
                reftable_path.RID,
                reftable_path.column_definitions[fkeycol['column_name']])

            # Map the old dbxref value to the new curie id value for the reference
            verbose('Remapping {count} fkey column values'.format(
                count=len(entities)))
            for entity in entities:
                if entity[fkeycol['column_name']]:
                    entity[fkeycol['column_name']] = dbxref_to_id[entity[
                        fkeycol['column_name']]]
            vverbose(list(entities))

            # Update referring table
            if not args.dryrun:
                verbose(
                    'Updating fkey column values, {max_up} at a time'.format(
                        max_up=args.max_update))
                slice_ct = 0
                slice_sz = args.max_update
                updated = []
                while (slice_ct * slice_sz) < len(entities):
                    data = entities[(slice_ct * slice_sz):((1 + slice_ct) *
                                                           slice_sz)]
                    reftable_path.update(data,
                                         targets=[fkeycol['column_name']])
                    updated.extend(data)
                    slice_ct += 1
                if len(updated) != len(entities):
                    print(
                        'WARNING: only updated {up_count} of {ent_count} entities!'
                        .format(up_count=len(updated),
                                ent_count=len(entities)))

            # Define new fkey
            verbose(
                'Defining and creating new foreign key reference to new vocab table'
            )
            fkey.referenced_columns[i]['column_name'] = 'id'
            new_fkey = ForeignKey.define(
                [
                    fkey.foreign_key_columns[j]['column_name']
                    for j in range(len(fkey.foreign_key_columns))
                ],
                schema_name,
                new_table_name, [
                    fkey.referenced_columns[k]['column_name']
                    for k in range(len(fkey.referenced_columns))
                ],
                on_update=fkey.on_update or 'NO ACTION',
                on_delete=fkey.on_delete or 'NO ACTION',
                constraint_names=fkey.names or [],
                comment=fkey.comment or None,
                acls=fkey.acls or {},
                acl_bindings=fkey.acl_bindings or {},
                annotations=fkey.annotations or {})
            vverbose(new_fkey)
            if not args.dryrun:
                reftable.create_fkey(catalog, new_fkey)

    if not args.dryrun:
        verbose('Dropping "dbxref" column from new vocab table')
        dbxref = new_table.column_definitions['dbxref']
        dbxref.delete(catalog, new_table)