class SuperPopulation(db.Model): id = db.Column(db.Integer, primary_key=True) code = db.Column(db.String(32), index=True, unique=True) name = db.Column(db.String(128)) color = db.Column(db.String(32)) data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id')) populations = db.relationship('Population', secondary=population_group_table, backref=db.backref('super_populations', lazy=True)) # TODO Make this a hybrid_property? def samples(self): samples = set() for population in self.populations: for sample in population.samples: samples.add(sample) return samples @property def population_count(self): return db.Session.object_session(self).\ query(Population).with_parent(self, "populations").count() @property def sample_count(self): return db.Session.object_session(self).\ query(Sample).join(Population).\ with_parent(self, "populations").count() def __repr__(self): return '<SuperPopulation {}>'.format(self.code)
class DataSource(db.Model): id = db.Column(db.Integer, primary_key=True) name = db.Column(db.String(128), index=True, unique=True) description = db.Column(db.Text) dois = db.relationship("DigitalObjectIdentifier", secondary=data_source_doi_table, backref=db.backref("data_sources", lazy=True)) super_populations = db.relationship("SuperPopulation", backref=db.backref("data_source", lazy=True)) popoulations = db.relationship("Population", backref=db.backref("data_source", lazy=True)) samples = db.relationship("Sample", backref=db.backref("data_source", lazy=True)) archaic_analysis_runs = db.relationship("ArchaicAnalysisRun", backref=db.backref("data_source", lazy=True)) def __repr__(self): return '<DataSource {}>'.format(self.name)
class ArchaicAnalysisRun(db.Model): id = db.Column(db.Integer, primary_key=True) name = db.Column(db.String(128), index=True, unique=True) description = db.Column(db.Text) publication_doi = db.Column(db.String(256)) date = db.Column(db.DateTime) data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id')) archaic_genome_data = db.relationship('ArchaicGenomeData', backref='archaic_analysis_run', lazy='dynamic') @hybrid_property def publication_url(self): publication_url = None if self.publication_doi is not None: publication_url = "https://doi.org/{}".foramt(self.publication_doi) return publication_url @hybrid_property def sampleids_with_data_query(self): return db.session.query(Sample.id).join(ArchaicGenomeData).\ join(ArchaicAnalysisRun).\ filter(ArchaicAnalysisRun.id == self.id) @hybrid_property def samples_with_data_query(self): return Sample.query.filter( Sample.id.in_(self.sampleids_with_data_query)) @hybrid_property def samples_with_data_count(self): return self.samples_with_data_query.count() @hybrid_property def samples_with_data(self): return self.samples_with_data_query.all() @hybrid_property def samples_without_data_query(self): return Sample.query.filter( Sample.id.notin_(self.sampleids_with_data_query)) @hybrid_property def samples_without_data_count(self): return self.samples_without_data_query.count() @hybrid_property def samples_without_data(self): return self.samples_without_data_query.all() def __repr__(self): return '<ArchaicAnalysisRun {}>'.format(self.name)
class Sample(db.Model): id = db.Column(db.Integer, primary_key=True) code = db.Column(db.String(32), index=True, unique=True) family_code = db.Column(db.String(32), index=True) gender = db.Column(db.String(32)) family_relationship = db.Column(db.String(128)) comments = db.Column(db.Text) population_id = db.Column(db.Integer, db.ForeignKey('population.id')) data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id')) archaic_genome_data = db.relationship('ArchaicGenomeData', backref='sample', lazy='dynamic') def __repr__(self): return '<Sample {}>'.format(self.code)
class Population(db.Model): id = db.Column(db.Integer, primary_key=True) code = db.Column(db.String(32), index=True, unique=True) name = db.Column(db.String(128), unique=True) description = db.Column(db.String(256)) latitude = db.Column(db.Float) longitude = db.Column(db.Float) data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id')) samples = db.relationship('Sample', backref='population', lazy='dynamic') sample_count = column_property( select([func.count(Sample.id) ]).where(Sample.population_id == id).correlate_except(Sample)) @property def archaic_analysis_runs(self): return ArchaicAnalysisRun.query.\ join(ArchaicGenomeData).join(Sample).join(Population).\ filter(Population.id == self.id).all() def archaic_genome_data_for_run_query(self, archaic_analysis_run_id): return ArchaicGenomeData.query.join(ArchaicAnalysisRun).\ join(Sample).join(Population).filter(Population.id == self.id).\ filter(ArchaicAnalysisRun.id == archaic_analysis_run_id) def archaic_genome_data_for_run(self, archaic_analysis_run_id): return self.archaic_genome_data_for_run_query( archaic_analysis_run_id=archaic_analysis_run_id).all() def samples_with_data_for_run_query(self, archaic_analysis_run_id): return Sample.query.join(ArchaicGenomeData).\ join(ArchaicAnalysisRun).\ join(Population).\ filter(ArchaicAnalysisRun.id == archaic_analysis_run_id).\ filter(Population.id == self.id) def samples_with_data_for_run(self, archaic_analysis_run_id): return self.samples_with_data_for_run_query( archaic_analysis_run_id=archaic_analysis_run_id).all() def avg_archaic_genome_stats(self, archaic_analysis_run_id): stmt = self.archaic_genome_data_for_run_query( archaic_analysis_run_id).subquery() return archaic_genome_stats_avg(stmt) def __repr__(self): return '<Population {}>'.format(self.code)
class ArchaicAnalysisRun(db.Model): id = db.Column(db.Integer, primary_key=True) name = db.Column(db.String(128), index=True, unique=True) description = db.Column(db.Text) publication_doi = db.Column(db.String(256)) date = db.Column(db.DateTime) data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id')) archaic_genome_data = db.relationship('ArchaicGenomeData', backref='archaic_analysis_run', lazy='dynamic') @hybrid_property def publication_url(self): publication_url = None if self.publication_doi is not None: publication_url = "https://doi.org/{}".foramt(self.publication_doi) return publication_url @hybrid_property def sampleids_with_data_query(self): return db.session.query(Sample.id).join(ArchaicGenomeData).\ join(ArchaicAnalysisRun).\ filter(ArchaicAnalysisRun.id == self.id) @hybrid_property def samples_with_data_query(self): return Sample.query.filter( Sample.id.in_(self.sampleids_with_data_query)) @hybrid_property def samples_with_data_count(self): return self.samples_with_data_query.count() @hybrid_property def samples_with_data(self): return self.samples_with_data_query.all() @hybrid_property def samples_without_data_query(self): return Sample.query.filter( Sample.id.notin_(self.sampleids_with_data_query)) @hybrid_property def samples_without_data_count(self): return self.samples_without_data_query.count() @hybrid_property def samples_without_data(self): return self.samples_without_data_query.all() def get_statistics(self, population_id): sums = db.session.query(ArchaicGenomeData.archaic_genome_call, ArchaicGenomeData.haplotype, func.sum(ArchaicGenomeData.total_bps). label('total_bp'), func.sum(ArchaicGenomeData.total_haplotypes). label('total_haplotypes')).\ join(Sample).\ filter(Sample.population_id == population_id, ArchaicGenomeData.archaic_analysis_run_id == self.id).\ group_by(ArchaicGenomeData.archaic_genome_call, ArchaicGenomeData.haplotype).all() result = { 'neandertal_bp': 0, 'neandertal_haplotypes': 0, 'denisovan_bp': 0, 'denisovan_haplotypes': 0, } total_bp = sum([s[2] for s in sums]) total_hap = sum([s[3] for s in sums]) for s in sums: if s[0] == 'den': result['denisovan_bp'] += s[2] / total_bp result['denisovan_haplotypes'] += s[3] / total_hap elif s[0] == 'neand': result['neandertal_bp'] += s[2] / total_bp result['neandertal_haplotypes'] += s[3] / total_hap return result def __repr__(self): return '<ArchaicAnalysisRun {}>'.format(self.name)