Ejemplo n.º 1
0
def ForeignKey(column, **params):
    column_params = {}
    for name in ('name', 'type_', 'default', 'doc', 'index', 'info', 'nullable', 'unique', 'primary_key'):
        if name in params:
            column_params[name] = params.pop(name)

    return Column(_ForeignKey(column, **params), **column_params)
Ejemplo n.º 2
0
Archivo: fields.py Proyecto: siq/spire
def ForeignKey(column, constraint_name=None, **params):
    column_params = {}
    for name in ("name", "type_", "default", "doc", "index", "info", "nullable", "unique", "primary_key"):
        if name in params:
            column_params[name] = params.pop(name)

    if constraint_name:
        params["name"] = constraint_name

    return Column(_ForeignKey(column, **params), **column_params)
Ejemplo n.º 3
0
def ForeignKey(column, constraint_name=None, **params):
    column_params = {}
    for name in ('name', 'type_', 'default', 'doc', 'index', 'info',
                 'nullable', 'unique', 'primary_key'):
        if name in params:
            column_params[name] = params.pop(name)

    if constraint_name:
        params['name'] = constraint_name

    return Column(_ForeignKey(column, **params), **column_params)
Ejemplo n.º 4
0
class SNP(Base):
    """An SQLAlchemy Talble for GRASP SNPs.

    Study and phenotype information are pushed to other tables to minimize
    table size and make querying easier.

    Table Name:
        snps

    Columns:
        Described in the columns attribute

    Attributes:
        int:      The ID number of the SNP, usually the NHLBIkey
        str:      SNP loction expressed as 'chr:pos'
        hvgs_ids: A list of HGVS IDs for this SNP
        columns:  A dictionary of all columns 'column_name'=>('type', 'desc')
    """

    __tablename__ = "snps"

    id = _Column(_BigInteger, primary_key=True, index=True)
    snpid = _Column(_String, index=True)
    chrom = _Column(_String(10), index=True)
    pos = _Column(_Integer, index=True)
    pval = _Column(_Float, index=True)
    NHLBIkey = _Column(_String, index=True)
    HUPfield = _Column(_String)
    LastCurationDate = _Column(_Date)
    CreationDate = _Column(_Date)
    population_id = _Column(_Integer,
                            _ForeignKey('populations.id'),
                            index=True)
    population = _relationship("Population", backref="snps")
    study_id = _Column(_Integer, _ForeignKey('studies.id'), index=True)
    study = _relationship("Study", back_populates="snps")
    study_snpid = _Column(_String, index=True)
    paper_loc = _Column(_String)
    phenotype_desc = _Column(_String, index=True)
    phenotype_cats = _relationship("PhenoCats",
                                   secondary=snp_pheno_assoc,
                                   back_populates="snps")
    InGene = _Column(_String, index=True)
    NearestGene = _Column(_String, index=True)
    InLincRNA = _Column(_String)
    InMiRNA = _Column(_String)
    InMiRNABS = _Column(_String)
    dbSNPfxn = _Column(_String)
    dbSNPMAF = _Column(_String)
    dbSNPinfo = _Column(_String)
    dbSNPvalidation = _Column(_String)
    dbSNPClinStatus = _Column(_String)
    ORegAnno = _Column(_String)
    ConservPredTFBS = _Column(_String)
    HumanEnhancer = _Column(_String)
    RNAedit = _Column(_String)
    PolyPhen2 = _Column(_String)
    SIFT = _Column(_String)
    LSSNP = _Column(_String)
    UniProt = _Column(_String)
    EqtlMethMetabStudy = _Column(_String)

    _Index('chrom_pos', 'chrom', 'pos')

    columns = _od([
        ('id', ('BigInteger', 'NHLBIkey')),
        ('snpid', ('String', 'SNPid')),
        ('chrom', ('String', 'chr')),
        ('pos', ('Integer', 'pos')),
        ('pval', ('Float', 'Pvalue')),
        ('NHLBIkey', ('String', 'NHLBIkey')),
        ('HUPfield', ('String', 'HUPfield')),
        ('LastCurationDate', ('Date', 'LastCurationDate')),
        ('CreationDate', ('Date', 'CreationDate')),
        ('population_id', ('Integer', 'Primary')),
        ('population', ('relationship', 'Link')),
        ('study_id', ('Integer', 'Primary')),
        ('study', ('relationship', 'Link')),
        ('study_snpid', ('String', 'SNPid')),
        ('paper_loc', ('String', 'LocationWithinPaper')),
        ('phenotype_desc', ('String', 'Phenotype')),
        ('phenotype_cats', ('relationship', 'Link')),
        ('InGene', ('String', 'InGene')),
        ('NearestGene', ('String', 'NearestGene')),
        ('InLincRNA', ('String', 'InLincRNA')),
        ('InMiRNA', ('String', 'InMiRNA')),
        ('InMiRNABS', ('String', 'InMiRNABS')),
        ('dbSNPfxn', ('String', 'dbSNPfxn')),
        ('dbSNPMAF', ('String', 'dbSNPMAF')),
        ('dbSNPinfo', ('String', 'dbSNPalleles')),
        ('dbSNPvalidation', ('String', 'dbSNPvalidation')),
        ('dbSNPClinStatus', ('String', 'dbSNPClinStatus')),
        ('ORegAnno', ('String', 'ORegAnno')),
        ('ConservPredTFBS', ('String', 'ConservPredTFBS')),
        ('HumanEnhancer', ('String', 'HumanEnhancer')),
        ('RNAedit', ('String', 'RNAedit')),
        ('PolyPhen2', ('String', 'PolyPhen2')),
        ('SIFT', ('String', 'SIFT')),
        ('LSSNP', ('String', 'LS')),
        ('UniProt', ('String', 'UniProt')),
        ('EqtlMethMetabStudy', ('String', 'EqtlMethMetabStudy')),
    ])
    """A description of all columns in this table."""
    @property
    def snp_loc(self):
        """Return a simple string containing the SNP location."""
        return "chr{}:{}".format(self.chrom, self.pos)

    @property
    def hvgs_ids(self):
        """The HVGS ID from myvariant."""
        if not hasattr(self, '_hvgs_ids'):
            mv = myvariant.MyVariantInfo()
            self._hvgs_ids = [
                i['_id'] for i in mv.query(self.snp_loc, fields='id')['hits']
            ]
        return self._hvgs_ids

    def get_variant_info(self, fields="dbsnp", pandas=True):
        """Use the myvariant API to get info about this SNP.

        Note that this service can be very slow.
        It will be faster to query multiple SNPs.

        Args:
            fields: Choose fields to display from:
                    `docs.myvariant.info/en/latest/doc/data.html#available-fields`_
                    Good choices are 'dbsnp', 'clinvar', or 'gwassnps'
                    Can also use 'grasp' to get a different version of this
                    info.
            pandas: Return a dataframe instead of dictionary.

        Returns:
            A dictionary or a dataframe.
        """
        mv = myvariant.MyVariantInfo()
        return mv.getvariants(self.hvgs_ids,
                              fields=fields,
                              as_dataframe=pandas,
                              df_index=True)

    def get_columns(self, return_as='list'):
        """Return all columns in the table nicely formatted.

        Display choices:
            list:       A python list of column names
            dictionary: A python dictionary of name=>desc
            long_dict:  A python dictionary of name=>(type, desc)

        Args:
            return_as:  {table,tab,list,dictionary,long_dict,id_dict}

        Returns:
            A list or dictionary
        """
        cols = self.columns
        if return_as == 'list':
            return [i[1] for i in cols.values()]
        elif return_as == 'dictionary':
            return {k: v[1] for k, v in cols.items()}
        elif return_as == 'long_dict':
            return cols
        else:
            raise Exception("'display_as' must be one of {table,tab,list}")

    def display_columns(self, display_as='table', write=False):
        """Return all columns in the table nicely formatted.

        Display choices:
            table:      A formatted grid-like table
            tab:        A tab delimited non-formatted version of table
            list:       A string list of column names

        Args:
            display_as: {table,tab,list}
            write:      If true, print output to console, otherwise return
                        string.

        Returns:
            A formatted string or None
        """
        cols = self.columns
        if display_as == 'table':
            out = _tb(
                [['Column', 'Description', 'Type']] +\
                [[k, v[1], v[0]] for k, v in cols.items()],
                headers='firstrow', tablefmt='grid'
            )
        elif display_as == 'tab':
            out = '\n'.join(
                ['\t'.join(['Column', 'Description', 'Type'])] +\
                ['\t'.join([k, v[1], v[0]]) for k, v in cols.items()],
            )
        elif display_as == 'list':
            out = '\n'.join([i[1] for i in cols.values()])
        else:
            raise Exception("'display_as' must be one of {table,tab,list}")
        if write:
            print(out)
        else:
            return out

    def __repr__(self):
        """Display information about the table."""
        return "{} ({}) <{}:{} pheno: {} study: {}".format(
            self.id, self.snpid, self.chrom, self.pos, self.phenotype_desc,
            self.study.title)

    def __int__(self):
        """Return ID number."""
        return self.id

    def __str__(self):
        """Return coordinates."""
        return self.snp_loc
Ejemplo n.º 5
0
class Study(Base):
    """An SQLAlchemy table to store study information.

    This table provides easy ways to query for SNPs by study information,
    including population and phenotype.

    Note: `disc_pop_flag` and `rep_pop_flag` are integer representations of
    a bitwise flag describing population, defined in ref.PopFlag. To see the
    string representation of this property, lookup `disc_pops` or `rep_pops`.

    Table Name:
        studies

    Columns:
        Described in the columns attribute.

    Attributes:
        int:       The integer ID number, usually the PMID, unless not indexed.
        str:       Summary data on this study.
        len:       The number of individuals in this study.
        disc_pops: A string displaying the number of discovery poplations.
        rep_pops:  A string displaying the number of replication poplations.
        columns:   A dictionary of all columns 'column_name'=>('type', 'desc')

        population_information:
            A multi-line string describing the populations in this study.

    """

    __tablename__ = "studies"

    id = _Column(_Integer, primary_key=True, index=True)
    pmid = _Column(_String(100), index=True)
    title = _Column(_String, index=True)
    journal = _Column(_String)
    author = _Column(_String)
    grasp_ver = _Column(_Integer, index=True)
    noresults = _Column(_Boolean)
    results = _Column(_Integer)
    qtl = _Column(_Boolean)
    snps = _relationship("SNP", back_populates='study')
    phenotype_id = _Column(_Integer, _ForeignKey('phenos.id'), index=True)
    phenotype = _relationship("Phenotype", back_populates="studies")
    phenotype_cats = _relationship("PhenoCats",
                                   secondary=study_pheno_assoc,
                                   back_populates="studies")
    datepub = _Column(_Date)
    in_nhgri = _Column(_Boolean)
    locations = _Column(_String)
    mf = _Column(_Boolean)
    mf_only = _Column(_Boolean)
    platforms = _relationship("Platform",
                              secondary=study_plat_assoc,
                              back_populates="studies")
    snp_count = _Column(_String)
    imputed = _Column(_Boolean)
    population_id = _Column(_Integer,
                            _ForeignKey('populations.id'),
                            index=True)
    population = _relationship("Population", backref="studies")
    total = _Column(_Integer)
    total_disc = _Column(_Integer)
    pop_flag = _Column(_Integer, index=True)  # Will hold a bitwise flag
    disc_pop_flag = _Column(_Integer, index=True)  # Will hold a bitwise flag
    european = _Column(_Integer)
    african = _Column(_Integer)
    east_asian = _Column(_Integer)
    south_asian = _Column(_Integer)
    hispanic = _Column(_Integer)
    native = _Column(_Integer)
    micronesian = _Column(_Integer)
    arab = _Column(_Integer)
    mixed = _Column(_Integer)
    unspecified = _Column(_Integer)
    filipino = _Column(_Integer)
    indonesian = _Column(_Integer)
    total_rep = _Column(_Integer)
    rep_pop_flag = _Column(_Integer, index=True)  # Will hold a bitwise flag
    rep_european = _Column(_Integer)
    rep_african = _Column(_Integer)
    rep_east_asian = _Column(_Integer)
    rep_south_asian = _Column(_Integer)
    rep_hispanic = _Column(_Integer)
    rep_native = _Column(_Integer)
    rep_micronesian = _Column(_Integer)
    rep_arab = _Column(_Integer)
    rep_mixed = _Column(_Integer)
    rep_unspecified = _Column(_Integer)
    rep_filipino = _Column(_Integer)
    rep_indonesian = _Column(_Integer)
    sample_size = _Column(_String)  # Maybe parse this better
    replication_size = _Column(_String)  # Maybe parse this better

    columns = _od([
        ('id', ('Integer', 'id')),
        ('pmid', ('String', 'PubmedID')),
        ('title', ('String', 'Study')),
        ('journal', ('String', 'Journal')),
        ('author', ('String', '1st_author')),
        ('grasp_ver', ('Integer', 'GRASPversion?')),
        ('noresults', ('Boolean', 'No results flag')),
        ('results', ('Integer', '#results')),
        ('qtl', ('Boolean', 'IsEqtl/meQTL/pQTL/gQTL/Metabolmics?')),
        ('snps', ('relationship', 'Link to all SNPs in this study')),
        ('phenotype_id', ('Integer',
                          'ID of primary phenotype in Phenotype table')),
        ('phenotype',
         ('relationship',
          'A link to the primary phenotype in the Phenotype table')),
        ('phenotype_cats',
         ('relationship',
          'A link to all phenotype categories assigned in the PhenoCats table')
         ),
        ('datepub', ('Date', 'DatePub')),
        ('in_nhgri', ('Boolean', 'In NHGRI GWAS catalog (8/26/14)?')),
        ('locations', ('String', 'Specific place(s) mentioned for samples')),
        ('mf',
         ('Boolean',
          'Includes male/female only analyses in discovery and/or replication?'
          )),
        ('mf_only', ('Boolean', 'Exclusively male or female study?')),
        ('platforms',
         ('relationship',
          'Link to platforms in the Platform table. Platform [SNPs passing QC]'
          )),
        ('snp_count', ('String', 'From "Platform [SNPs passing QC]"')),
        ('imputed', ('Boolean', 'From "Platform [SNPs passing QC]"')),
        ('population_id', ('Integer', 'Primary key of population table')),
        ('population', ('relationship', 'GWAS description, link to table')),
        ('total', ('Integer', 'Total Discovery + Replication sample size')),
        ('total_disc', ('Integer', 'Total discovery samples')),
        ('pop_flag',
         ('Integer',
          'A bitwise flag that shows presence/absence of all populations (discovery and replication)'
          )),
        ('disc_pop_flag',
         ('Integer',
          'A bitwise flag that shows presence/absence of discovery populations'
          )),
        ('european', ('Integer', 'European')),
        ('african', ('Integer', 'African ancestry')),
        ('east_asian', ('Integer', 'East Asian')),
        ('south_asian', ('Integer', 'Indian/South Asian')),
        ('hispanic', ('Integer', 'Hispanic')),
        ('native', ('Integer', 'Native')),
        ('micronesian', ('Integer', 'Micronesian')),
        ('arab', ('Integer', 'Arab/ME')),
        ('mixed', ('Integer', 'Mixed')),
        ('unpecified', ('Integer', 'Unspec')),
        ('filipino', ('Integer', 'Filipino')),
        ('indonesian', ('Integer', 'Indonesian')),
        ('total_rep', ('Integer', 'Total replication samples')),
        ('rep_pop_flag',
         ('Integer',
          'A bitwise flag that shows presence/absence of replication populations'
          )),
        ('rep_european', ('Integer', 'European.1')),
        ('rep_african', ('Integer', 'African ancestry.1')),
        ('rep_east_asian', ('Integer', 'East Asian.1')),
        ('rep_south_asian', ('Integer', 'Indian/South Asian.1')),
        ('rep_hispanic', ('Integer', 'Hispanic.1')),
        ('rep_native', ('Integer', 'Native.1')),
        ('rep_micronesian', ('Integer', 'Micronesian.1')),
        ('rep_arab', ('Integer', 'Arab/ME.1')),
        ('rep_mixed', ('Integer', 'Mixed.1')),
        ('rep_unpecified', ('Integer', 'Unspec.1')),
        ('rep_filipino', ('Integer', 'Filipino.1')),
        ('rep_indonesian', ('Integer', 'Indonesian.1')),
        ('sample_size',
         ('String',
          'Initial Sample Size, string description of integer population counts above.'
          )),
        ('replication_size',
         ('String',
          'Replication Sample Size, string description of integer population counts above.'
          )),
    ])
    """A description of all columns in this table."""
    @property
    def disc_pops(self):
        """Convert disc_pop_flag to PopFlag."""
        return _PopFlag(self.disc_pop_flag)

    @property
    def rep_pops(self):
        """Convert rep_pop_flag to PopFlag."""
        return _PopFlag(self.rep_pop_flag)

    @property
    def pops(self):
        """Convert rep_pop_flag to PopFlag."""
        return _PopFlag(self.pop_flag)

    @property
    def population_information(self):
        """Display a summary of population data."""
        outstr = [
            "Primary population: {}\n".format(self.population.population),
            "Individuals: {}\n".format(self.total),
            "Discovery populations: {}; Total: {}\n".format(
                self.disc_pops.to_simple_str(), self.total_disc)
        ]
        for pop in [
                'european', 'african', 'east_asian', 'south_asian', 'hispanic',
                'native', 'micronesian', 'arab', 'unspecified', 'filipino',
                'indonesian'
        ]:
            outstr.append('\t{}: {}\n'.format(pop, eval('self.' + pop)))

        outstr.append("Replication populations: {}; Total: {}\n".format(
            self.rep_pops.to_simple_str(), self.total_rep))
        for pop in [
                'european', 'african', 'east_asian', 'south_asian', 'hispanic',
                'native', 'micronesian', 'arab', 'unspecified', 'filipino',
                'indonesian'
        ]:
            outstr.append('\t{}: {}\n'.format(pop, eval('self.rep_' + pop)))

    def get_columns(self, return_as='list'):
        """Return all columns in the table nicely formatted.

        Display choices:
            list:       A python list of column names
            dictionary: A python dictionary of name=>desc
            long_dict:  A python dictionary of name=>(type, desc)

        Args:
            return_as:  {table,tab,list,dictionary,long_dict,id_dict}

        Returns:
            A list or dictionary
        """
        cols = self.columns
        if return_as == 'list':
            return [i[1] for i in cols.values()]
        elif return_as == 'dictionary':
            return {k: v[1] for k, v in cols.items()}
        elif return_as == 'long_dict':
            return cols
        else:
            raise Exception("'display_as' must be one of {table,tab,list}")

    def display_columns(self, display_as='table', write=False):
        """Return all columns in the table nicely formatted.

        Display choices:
            table:      A formatted grid-like table
            tab:        A tab delimited non-formatted version of table
            list:       A string list of column names

        Args:
            display_as: {table,tab,list}
            write:      If true, print output to console, otherwise return
                        string.

        Returns:
            A formatted string or None
        """
        cols = self.columns
        if display_as == 'table':
            out = _tb(
                [['Column', 'Description', 'Type']] +\
                [[k, v[1], v[0]] for k, v in cols.items()],
                headers='firstrow', tablefmt='grid'
            )
        elif display_as == 'tab':
            out = '\n'.join(
                ['\t'.join(['Column', 'Description', 'Type'])] +\
                ['\t'.join([k, v[1], v[0]]) for k, v in cols.items()],
            )
        elif display_as == 'list':
            out = '\n'.join([i[1] for i in cols.values()])
        else:
            raise Exception("'display_as' must be one of {table,tab,list}")
        if write:
            print(out)
        else:
            return out

    def __repr__(self):
        """Display informaertn about this study."""
        return '{} <{}:{} "{}" ({}; Pop: {}; Disc Pops: {}; Rep Pops: {})>'.\
            format(self.id, self.author, self.journal, self.title,
                   self.phenotype.phenotype, self.population.population,
                   self.disc_pops.to_simple_str(),
                   self.rep_pops.to_simple_str())

    def __str__(self):
        """Display refertnce."""
        return "{}: {} ({})\nSNPS: {}\nInds: {}\n".format(
            self.journal,
            self.title,
            self.author,
            self.snp_count,
            self.total,
        ) + "Disc Pops: {}; Rep Pops: {}; EUR: {}; AFR: {}".format(
            self.disc_pops.to_simple_str, self.rep_pops.to_simple_str,
            self.european, self.african)

    def __int__(self):
        """Return ID number."""
        return self.id

    def __len__(self):
        """Return total individual count."""
        return int(self.total)
Ejemplo n.º 6
0
# Flags
from .ref import PopFlag as _PopFlag

__all__ = ["SNP", "Phenotype", "PhenoCats", "Platform", "Population"]

Base = _declarative_base()
"""The SQLAlchemy Base for the database."""

###############################################################################
#                             Association Tables                              #
###############################################################################

snp_pheno_assoc = _Table(
    'snp_pheno_association', Base.metadata,
    _Column('snp_id', _BigInteger, _ForeignKey('snps.id')),
    _Column('pheno_id', _Integer, _ForeignKey('pheno_cats.id')))

study_pheno_assoc = _Table(
    'study_pheno_association', Base.metadata,
    _Column('study_id', _Integer, _ForeignKey('studies.id')),
    _Column('pheno_id', _Integer, _ForeignKey('pheno_cats.id')))

study_plat_assoc = _Table(
    'study_platform_association', Base.metadata,
    _Column('study_id', _Integer, _ForeignKey('studies.id')),
    _Column('platform_id', _Integer, _ForeignKey('platforms.id')))

###############################################################################
#                               Database Tables                               #
###############################################################################
Ejemplo n.º 7
0
    class RiskAllele(Base):
        """This class holds the SQLAlchemy definition of the risk allele table.

        Always Populated Columns:
            id (int):          A unique ID
            pmid (int):        The pubmed ID of the study
            trait (str):       A string describing the trait of interest
            rsID (str):        The rsid of the SNP
            population (str):  The population the data is for
            risk_allele (str): The risk allele

        Optional Columns:
            chrom (str):      Chromosome string (hg19)
            position (int):   Position on chromosome (hg19)
            A1 (str):         The A1 allele
            A2 (str):         The A2 allele
            OR (float):       The odds ratio for A1
            B (float):        The beta (effect size) for A1
            OR_B (float):     Either the OR or B
            P (float):        p-value of the SNP-trait pair
            N (int):          Number of individuals supporting the SNP
            N_cases (int):    Number of of individuals in the case group
            N_controls (int): Number of of individuals in the control group
            source (str):     Which database this record is from
            grasp_id (int):   The ID of this SNP-trait pair in the GRASP database
            population_info (str): Extra population information
        """
        __tablename__ = 'risk_alleles'

        id = _Column(_Integer, primary_key=True)
        pmid = _Column(_String, _ForeignKey('pmids.pmid'), nullable=False)
        paper = _relationship('PMID', backref='snps')
        trait_id = _Column(_Integer, _ForeignKey('traits.id'), nullable=False)
        trait = _relationship('Trait', backref='snps')
        rsID = _Column(_String, index=True, nullable=False)
        population = _Column(_String, index=True, nullable=False)
        risk_allele = _Column(_String(1), nullable=False)
        chrom = _Column(_String, index=True)
        position = _Column(_Integer, index=True)
        A1 = _Column(_String)
        A2 = _Column(_String)
        OR = _Column(_Float, index=True)
        B = _Column(_Float)
        OR_B = _Column(_Float)
        P = _Column(_Float(precision=128), index=True)
        cases_MAF = _Column(_Float)
        controls_MAF = _Column(_Float)
        N = _Column(_Integer)
        N_cases = _Column(_Integer)
        N_controls = _Column(_Integer)
        source = _Column(_String)
        grasp_id = _Column(_Integer)
        population_info = _Column(_String)

        pmid_rsid = _Index('pmid_rsid', pmid, rsID)
        loc = _Index('loc', chrom, position)

        # Make pmid, trait, rsID, population, risk_allele unique
        _Unique(pmid, trait_id, rsID, population, risk_allele)

        def __repr__(self):
            """Better display of data."""
            out = (
                "RiskAllele<id={id}, pmid={pmid}, rsID={rsid}, trait={trait}, "
                "risk_allele={risk_allele}>")
            return out.format(id=self.id,
                              pmid=self.pmid,
                              rsid=self.rsID,
                              trait=self.trait.trait,
                              risk_allele=self.risk_allele)