Ejemplo n.º 1
0
class SuperPopulation(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    code = db.Column(db.String(32), index=True, unique=True)
    name = db.Column(db.String(128))
    color = db.Column(db.String(32))
    data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id'))
    populations = db.relationship('Population',
                                  secondary=population_group_table,
                                  backref=db.backref('super_populations',
                                                     lazy=True))

    # TODO Make this a hybrid_property?
    def samples(self):
        samples = set()
        for population in self.populations:
            for sample in population.samples:
                samples.add(sample)
        return samples

    @property
    def population_count(self):
        return db.Session.object_session(self).\
            query(Population).with_parent(self, "populations").count()

    @property
    def sample_count(self):
        return db.Session.object_session(self).\
            query(Sample).join(Population).\
            with_parent(self, "populations").count()

    def __repr__(self):
        return '<SuperPopulation {}>'.format(self.code)
Ejemplo n.º 2
0
class DataSource(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(128), index=True, unique=True)
    description = db.Column(db.Text)
    dois = db.relationship("DigitalObjectIdentifier",
                           secondary=data_source_doi_table,
                           backref=db.backref("data_sources", lazy=True))
    super_populations = db.relationship("SuperPopulation",
                                        backref=db.backref("data_source",
                                                           lazy=True))
    popoulations = db.relationship("Population",
                                   backref=db.backref("data_source",
                                                      lazy=True))
    samples = db.relationship("Sample",
                              backref=db.backref("data_source", lazy=True))
    archaic_analysis_runs = db.relationship("ArchaicAnalysisRun",
                                            backref=db.backref("data_source",
                                                               lazy=True))

    def __repr__(self):
        return '<DataSource {}>'.format(self.name)
Ejemplo n.º 3
0
class ArchaicAnalysisRun(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(128), index=True, unique=True)
    description = db.Column(db.Text)
    publication_doi = db.Column(db.String(256))
    date = db.Column(db.DateTime)
    data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id'))
    archaic_genome_data = db.relationship('ArchaicGenomeData',
                                          backref='archaic_analysis_run',
                                          lazy='dynamic')

    @hybrid_property
    def publication_url(self):
        publication_url = None
        if self.publication_doi is not None:
            publication_url = "https://doi.org/{}".foramt(self.publication_doi)
        return publication_url

    @hybrid_property
    def sampleids_with_data_query(self):
        return db.session.query(Sample.id).join(ArchaicGenomeData).\
            join(ArchaicAnalysisRun).\
            filter(ArchaicAnalysisRun.id == self.id)

    @hybrid_property
    def samples_with_data_query(self):
        return Sample.query.filter(
            Sample.id.in_(self.sampleids_with_data_query))

    @hybrid_property
    def samples_with_data_count(self):
        return self.samples_with_data_query.count()

    @hybrid_property
    def samples_with_data(self):
        return self.samples_with_data_query.all()

    @hybrid_property
    def samples_without_data_query(self):
        return Sample.query.filter(
            Sample.id.notin_(self.sampleids_with_data_query))

    @hybrid_property
    def samples_without_data_count(self):
        return self.samples_without_data_query.count()

    @hybrid_property
    def samples_without_data(self):
        return self.samples_without_data_query.all()

    def __repr__(self):
        return '<ArchaicAnalysisRun {}>'.format(self.name)
Ejemplo n.º 4
0
class Sample(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    code = db.Column(db.String(32), index=True, unique=True)
    family_code = db.Column(db.String(32), index=True)
    gender = db.Column(db.String(32))
    family_relationship = db.Column(db.String(128))
    comments = db.Column(db.Text)
    population_id = db.Column(db.Integer, db.ForeignKey('population.id'))
    data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id'))
    archaic_genome_data = db.relationship('ArchaicGenomeData',
                                          backref='sample',
                                          lazy='dynamic')

    def __repr__(self):
        return '<Sample {}>'.format(self.code)
Ejemplo n.º 5
0
class Population(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    code = db.Column(db.String(32), index=True, unique=True)
    name = db.Column(db.String(128), unique=True)
    description = db.Column(db.String(256))
    latitude = db.Column(db.Float)
    longitude = db.Column(db.Float)
    data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id'))
    samples = db.relationship('Sample', backref='population', lazy='dynamic')

    sample_count = column_property(
        select([func.count(Sample.id)
                ]).where(Sample.population_id == id).correlate_except(Sample))

    @property
    def archaic_analysis_runs(self):
        return ArchaicAnalysisRun.query.\
            join(ArchaicGenomeData).join(Sample).join(Population).\
            filter(Population.id == self.id).all()

    def archaic_genome_data_for_run_query(self, archaic_analysis_run_id):
        return ArchaicGenomeData.query.join(ArchaicAnalysisRun).\
            join(Sample).join(Population).filter(Population.id == self.id).\
            filter(ArchaicAnalysisRun.id == archaic_analysis_run_id)

    def archaic_genome_data_for_run(self, archaic_analysis_run_id):
        return self.archaic_genome_data_for_run_query(
            archaic_analysis_run_id=archaic_analysis_run_id).all()

    def samples_with_data_for_run_query(self, archaic_analysis_run_id):
        return Sample.query.join(ArchaicGenomeData).\
            join(ArchaicAnalysisRun).\
            join(Population).\
            filter(ArchaicAnalysisRun.id == archaic_analysis_run_id).\
            filter(Population.id == self.id)

    def samples_with_data_for_run(self, archaic_analysis_run_id):
        return self.samples_with_data_for_run_query(
            archaic_analysis_run_id=archaic_analysis_run_id).all()

    def avg_archaic_genome_stats(self, archaic_analysis_run_id):
        stmt = self.archaic_genome_data_for_run_query(
            archaic_analysis_run_id).subquery()
        return archaic_genome_stats_avg(stmt)

    def __repr__(self):
        return '<Population {}>'.format(self.code)
Ejemplo n.º 6
0
class ArchaicAnalysisRun(db.Model):
    id = db.Column(db.Integer, primary_key=True)
    name = db.Column(db.String(128), index=True, unique=True)
    description = db.Column(db.Text)
    publication_doi = db.Column(db.String(256))
    date = db.Column(db.DateTime)
    data_source_id = db.Column(db.Integer, db.ForeignKey('data_source.id'))
    archaic_genome_data = db.relationship('ArchaicGenomeData',
                                          backref='archaic_analysis_run',
                                          lazy='dynamic')

    @hybrid_property
    def publication_url(self):
        publication_url = None
        if self.publication_doi is not None:
            publication_url = "https://doi.org/{}".foramt(self.publication_doi)
        return publication_url

    @hybrid_property
    def sampleids_with_data_query(self):
        return db.session.query(Sample.id).join(ArchaicGenomeData).\
            join(ArchaicAnalysisRun).\
            filter(ArchaicAnalysisRun.id == self.id)

    @hybrid_property
    def samples_with_data_query(self):
        return Sample.query.filter(
            Sample.id.in_(self.sampleids_with_data_query))

    @hybrid_property
    def samples_with_data_count(self):
        return self.samples_with_data_query.count()

    @hybrid_property
    def samples_with_data(self):
        return self.samples_with_data_query.all()

    @hybrid_property
    def samples_without_data_query(self):
        return Sample.query.filter(
            Sample.id.notin_(self.sampleids_with_data_query))

    @hybrid_property
    def samples_without_data_count(self):
        return self.samples_without_data_query.count()

    @hybrid_property
    def samples_without_data(self):
        return self.samples_without_data_query.all()

    def get_statistics(self, population_id):
        sums = db.session.query(ArchaicGenomeData.archaic_genome_call,
                                ArchaicGenomeData.haplotype,
                                func.sum(ArchaicGenomeData.total_bps).
                                label('total_bp'),
                                func.sum(ArchaicGenomeData.total_haplotypes).
                                label('total_haplotypes')).\
            join(Sample).\
            filter(Sample.population_id == population_id,
                   ArchaicGenomeData.archaic_analysis_run_id == self.id).\
            group_by(ArchaicGenomeData.archaic_genome_call,
                     ArchaicGenomeData.haplotype).all()

        result = {
            'neandertal_bp': 0,
            'neandertal_haplotypes': 0,
            'denisovan_bp': 0,
            'denisovan_haplotypes': 0,
        }
        total_bp = sum([s[2] for s in sums])
        total_hap = sum([s[3] for s in sums])

        for s in sums:
            if s[0] == 'den':
                result['denisovan_bp'] += s[2] / total_bp
                result['denisovan_haplotypes'] += s[3] / total_hap

            elif s[0] == 'neand':
                result['neandertal_bp'] += s[2] / total_bp
                result['neandertal_haplotypes'] += s[3] / total_hap

        return result

    def __repr__(self):
        return '<ArchaicAnalysisRun {}>'.format(self.name)