Esempio n. 1
0
class Sample(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    code = db.Column(db.String(32), index=True, unique=True)
    family_code = db.Column(db.String(32), index=True)
    gender = db.Column(db.String(32))
    family_relationship = db.Column(db.String(128))
    comments = db.Column(db.Text)
    population_id = db.Column(db.Integer, db.ForeignKey('population.id'))
    data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id'))
    archaic_genome_data = db.relationship('ArchaicGenomeData',
                                          backref='sample',
                                          lazy='dynamic')

    def __repr__(self):
        return '<Sample {}>'.format(self.code)
Esempio n. 2
0
class SuperPopulation(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    code = db.Column(db.String(32), index=True, unique=True)
    name = db.Column(db.String(128))
    color = db.Column(db.String(32))
    data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id'))
    populations = db.relationship('Population',
                                  secondary=population_group_table,
                                  backref=db.backref('super_populations',
                                                     lazy=True))

    # TODO Make this a hybrid_property?
    def samples(self):
        samples = set()
        for population in self.populations:
            for sample in population.samples:
                samples.add(sample)
        return samples

    @property
    def population_count(self):
        return db.Session.object_session(self).\
            query(Population).with_parent(self, "populations").count()

    @property
    def sample_count(self):
        return db.Session.object_session(self).\
            query(Sample).join(Population).\
            with_parent(self, "populations").count()

    def __repr__(self):
        return '<SuperPopulation {}>'.format(self.code)
class ArchaicAnalysisRun(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(128), index=True, unique=True)
    description = db.Column(db.Text)
    publication_doi = db.Column(db.String(256))
    date = db.Column(db.DateTime)
    data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id'))
    archaic_genome_data = db.relationship('ArchaicGenomeData',
                                          backref='archaic_analysis_run',
                                          lazy='dynamic')

    @hybrid_property
    def publication_url(self):
        publication_url = None
        if self.publication_doi is not None:
            publication_url = "https://doi.org/{}".foramt(self.publication_doi)
        return publication_url

    @hybrid_property
    def sampleids_with_data_query(self):
        return db.session.query(Sample.id).join(ArchaicGenomeData).\
            join(ArchaicAnalysisRun).\
            filter(ArchaicAnalysisRun.id == self.id)

    @hybrid_property
    def samples_with_data_query(self):
        return Sample.query.filter(
            Sample.id.in_(self.sampleids_with_data_query))

    @hybrid_property
    def samples_with_data_count(self):
        return self.samples_with_data_query.count()

    @hybrid_property
    def samples_with_data(self):
        return self.samples_with_data_query.all()

    @hybrid_property
    def samples_without_data_query(self):
        return Sample.query.filter(
            Sample.id.notin_(self.sampleids_with_data_query))

    @hybrid_property
    def samples_without_data_count(self):
        return self.samples_without_data_query.count()

    @hybrid_property
    def samples_without_data(self):
        return self.samples_without_data_query.all()

    def __repr__(self):
        return '<ArchaicAnalysisRun {}>'.format(self.name)
Esempio n. 4
0
class ArchaicGenomeData(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    sample_id = db.Column(db.Integer,
                          db.ForeignKey('sample.id'),
                          nullable=False)
    archaic_analysis_run_id = db.Column(
        db.Integer, db.ForeignKey('archaic_analysis_run.id'), nullable=False)
    archaic_genome_call = db.Column(db.String(128), nullable=False, index=True)
    haplotype = db.Column(db.Integer, nullable=False)
    __table_args__ = (db.Index('idx_sample_run_genome_haplotype',
                               'sample_id',
                               'archaic_analysis_run_id',
                               'archaic_genome_call',
                               'haplotype',
                               unique=True), )

    bed_file = db.Column(db.String(512), nullable=False)
    total_bps = db.Column(db.Integer, nullable=False)
    total_haplotypes = db.Column(db.Integer, nullable=False)

    def __repr__(self):
        return '<ArchaicGenomeData {}:{}:{}:{}>'.format(
            self.sample.code, self.archaic_analysis_run.name,
            self.archaic_genome_call, self.haplotype)
Esempio n. 5
0
class Population(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    code = db.Column(db.String(32), index=True, unique=True)
    name = db.Column(db.String(128), unique=True)
    description = db.Column(db.String(256))
    latitude = db.Column(db.Float)
    longitude = db.Column(db.Float)
    data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id'))
    samples = db.relationship('Sample', backref='population', lazy='dynamic')

    sample_count = column_property(
        select([func.count(Sample.id)
                ]).where(Sample.population_id == id).correlate_except(Sample))

    @property
    def archaic_analysis_runs(self):
        return ArchaicAnalysisRun.query.\
            join(ArchaicGenomeData).join(Sample).join(Population).\
            filter(Population.id == self.id).all()

    def archaic_genome_data_for_run_query(self, archaic_analysis_run_id):
        return ArchaicGenomeData.query.join(ArchaicAnalysisRun).\
            join(Sample).join(Population).filter(Population.id == self.id).\
            filter(ArchaicAnalysisRun.id == archaic_analysis_run_id)

    def archaic_genome_data_for_run(self, archaic_analysis_run_id):
        return self.archaic_genome_data_for_run_query(
            archaic_analysis_run_id=archaic_analysis_run_id).all()

    def samples_with_data_for_run_query(self, archaic_analysis_run_id):
        return Sample.query.join(ArchaicGenomeData).\
            join(ArchaicAnalysisRun).\
            join(Population).\
            filter(ArchaicAnalysisRun.id == archaic_analysis_run_id).\
            filter(Population.id == self.id)

    def samples_with_data_for_run(self, archaic_analysis_run_id):
        return self.samples_with_data_for_run_query(
            archaic_analysis_run_id=archaic_analysis_run_id).all()

    def avg_archaic_genome_stats(self, archaic_analysis_run_id):
        stmt = self.archaic_genome_data_for_run_query(
            archaic_analysis_run_id).subquery()
        return archaic_genome_stats_avg(stmt)

    def __repr__(self):
        return '<Population {}>'.format(self.code)
Esempio n. 6
0
class ArchaicAnalysisRun(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(128), index=True, unique=True)
    description = db.Column(db.Text)
    publication_doi = db.Column(db.String(256))
    date = db.Column(db.DateTime)
    data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id'))
    archaic_genome_data = db.relationship('ArchaicGenomeData',
                                          backref='archaic_analysis_run',
                                          lazy='dynamic')

    @hybrid_property
    def publication_url(self):
        publication_url = None
        if self.publication_doi is not None:
            publication_url = "https://doi.org/{}".foramt(self.publication_doi)
        return publication_url

    @hybrid_property
    def sampleids_with_data_query(self):
        return db.session.query(Sample.id).join(ArchaicGenomeData).\
            join(ArchaicAnalysisRun).\
            filter(ArchaicAnalysisRun.id == self.id)

    @hybrid_property
    def samples_with_data_query(self):
        return Sample.query.filter(
            Sample.id.in_(self.sampleids_with_data_query))

    @hybrid_property
    def samples_with_data_count(self):
        return self.samples_with_data_query.count()

    @hybrid_property
    def samples_with_data(self):
        return self.samples_with_data_query.all()

    @hybrid_property
    def samples_without_data_query(self):
        return Sample.query.filter(
            Sample.id.notin_(self.sampleids_with_data_query))

    @hybrid_property
    def samples_without_data_count(self):
        return self.samples_without_data_query.count()

    @hybrid_property
    def samples_without_data(self):
        return self.samples_without_data_query.all()

    def get_statistics(self, population_id):
        sums = db.session.query(ArchaicGenomeData.archaic_genome_call,
                                ArchaicGenomeData.haplotype,
                                func.sum(ArchaicGenomeData.total_bps).
                                label('total_bp'),
                                func.sum(ArchaicGenomeData.total_haplotypes).
                                label('total_haplotypes')).\
            join(Sample).\
            filter(Sample.population_id == population_id,
                   ArchaicGenomeData.archaic_analysis_run_id == self.id).\
            group_by(ArchaicGenomeData.archaic_genome_call,
                     ArchaicGenomeData.haplotype).all()

        result = {
            'neandertal_bp': 0,
            'neandertal_haplotypes': 0,
            'denisovan_bp': 0,
            'denisovan_haplotypes': 0,
        }
        total_bp = sum([s[2] for s in sums])
        total_hap = sum([s[3] for s in sums])

        for s in sums:
            if s[0] == 'den':
                result['denisovan_bp'] += s[2] / total_bp
                result['denisovan_haplotypes'] += s[3] / total_hap

            elif s[0] == 'neand':
                result['neandertal_bp'] += s[2] / total_bp
                result['neandertal_haplotypes'] += s[3] / total_hap

        return result

    def __repr__(self):
        return '<ArchaicAnalysisRun {}>'.format(self.name)
Esempio n. 7
0
    family_relationship = db.Column(db.String(128))
    comments = db.Column(db.Text)
    population_id = db.Column(db.Integer, db.ForeignKey('population.id'))
    data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id'))
    archaic_genome_data = db.relationship('ArchaicGenomeData',
                                          backref='sample',
                                          lazy='dynamic')

    def __repr__(self):
        return '<Sample {}>'.format(self.code)


population_group_table = db.Table(
    'population_group', db.Model.metadata,
    db.Column('super_population_id', db.Integer,
              db.ForeignKey('super_population.id')),
    db.Column('population_id', db.Integer, db.ForeignKey('population.id')))


class Population(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    code = db.Column(db.String(32), index=True, unique=True)
    name = db.Column(db.String(128), unique=True)
    description = db.Column(db.String(256))
    latitude = db.Column(db.Float)
    longitude = db.Column(db.Float)
    data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id'))
    samples = db.relationship('Sample', backref='population', lazy='dynamic')

    sample_count = column_property(
        select([func.count(Sample.id)