class Outcome(db.Model, Base):
    """
    Outcome entity.

    :param kf_id: Unique id given by the Kid's First DCC
    :param created_at: Time of object creation
    :param modified_at: Last time of object modification
    :param external_id: Name given to outcome by contributor
    :param vital_status: Vital status of the participant
    :param disease_related: true if Deceased and cause of death
    was disease related
    , false if Deceasedand cause of death was disease related, Not Reported
    :param age_at_event_days: Age at the time of outcome occured
    in number of days since birth.
    """
    __tablename__ = 'outcome'
    __prefix__ = 'OC'

    external_id = db.Column(db.Text(),
                            doc='external id used by contributor')
    vital_status = db.Column(db.Text(),
                             doc='The vital status reported')
    disease_related = db.Column(db.Text())
    age_at_event_days = db.Column(db.Integer(),
                                  doc='Age at the time of event occurred in '
                                      'number of days since birth.')
    participant_id = db.Column(KfId(),
                               db.ForeignKey('participant.kf_id'),
                               nullable=False,
                               doc='kf_id of the participant this outcome was '
                                   'reported for')
Exemple #2
0
class ReadGroup(db.Model, Base):
    """
    ReadGroup entity.
    :param kf_id: Unique id given by the Kid's First DCC
    :param external_id: Name given to sequencing experiment by contributor
    :param paired_end: The direction of the read
    :param flow_cell: The identifier for the group's flow cell
    :param lane_number: The group's lane
    :param quality_scale: The quality score encoding of the fastq file
    """
    __tablename__ = 'read_group'
    __prefix__ = 'RG'

    external_id = db.Column(db.Text(),
                            nullable=True,
                            doc='Name given to read group by the contributor')
    flow_cell = db.Column(db.Text(),
                          doc='The identifier of the group\'s flow cell')
    lane_number = db.Column(db.Float(), doc='The group\'s lane')
    quality_scale = db.Column(db.Text(),
                              doc='The scale used to encode quality scores')

    genomic_files = association_proxy(
        'read_group_genomic_files',
        'genomic_file',
        creator=lambda gf: ReadGroupGenomicFile(genomic_file=gf))

    read_group_genomic_files = db.relationship('ReadGroupGenomicFile',
                                               backref='read_group',
                                               cascade='all, delete-orphan')
class Investigator(db.Model, Base):
    """
    Study entity representing the Investigator.

    :param kf_id: Unique id given by the Kid's First DCC
    :param created_at: Time of object creation
    :param modified_at: Last time of object modification
    :param external_id: Name given to investigator by contributor
    :param name: Name of the investigator
    :param institution: institution of the investigator
    """
    __tablename__ = 'investigator'
    __prefix__ = 'IG'

    external_id = db.Column(db.Text(), doc='external id used by contributor')
    name = db.Column(db.Text(), doc='The name of the investigator')
    institution = db.Column(db.Text(),
                            doc='The name of the investigator\'s institution')
    studies = db.relationship(Study,
                              backref=db.backref('investigator', lazy=True),
                              doc='kf_id of the studies belonging to this '
                              'investigator')

    def __repr__(self):
        return '<Investigator {}>'.format(self.kf_id)
class BiospecimenDiagnosis(db.Model, Base):
    """
    Represents association table between biospecimen table and
    diagnosis table. Contains all biospecimen, diagnosis combiniations.
    :param kf_id: Unique id given by the Kid's First DCC
    :param created_at: Time of object creation
    :param modified_at: Last time of object modification
    """
    __tablename__ = 'biospecimen_diagnosis'
    __prefix__ = 'BD'
    __table_args__ = (db.UniqueConstraint('diagnosis_id', 'biospecimen_id'), )
    diagnosis_id = db.Column(KfId(),
                             db.ForeignKey('diagnosis.kf_id'),
                             nullable=False)

    biospecimen_id = db.Column(KfId(),
                               db.ForeignKey('biospecimen.kf_id'),
                               nullable=False)
    external_id = db.Column(db.Text(), doc='external id used by contributor')

    biospecimen = db.relationship(Biospecimen,
                                  backref=db.backref(
                                      'biospecimen_diagnoses',
                                      cascade='all, delete-orphan'))

    diagnosis = db.relationship(Diagnosis,
                                backref=db.backref(
                                    'biospecimen_diagnoses',
                                    cascade='all, delete-orphan'))
Exemple #5
0
class CavaticaTask(db.Model, Base):
    """
    CavaticaTask entity represents an executed Cavatica task

    :param kf_id: Unique id given by the Kid's First DCC
    :param created_at: Time of object creation
    :param modified_at: Last time of object modification
    :param name: Name of cavatica_task
    :param external_cavatica_task_id: Id of executed task assigned by Cavatica
    """
    __tablename__ = 'cavatica_task'
    __prefix__ = 'CT'

    external_cavatica_task_id = db.Column(UUID(as_uuid=True),
                                          doc='Id assigned to Cavatica task'
                                          'by Cavatica')
    name = db.Column(db.Text(), doc='Name given to Cavatica task by user')

    cavatica_app_id = db.Column(KfId(),
                                db.ForeignKey('cavatica_app.kf_id'),
                                doc='Id for the Cavatica app to which this '
                                'task belongs')

    genomic_files = association_proxy(
        'cavatica_task_genomic_files',
        'genomic_file',
        creator=lambda genomic_file: CavaticaTaskGenomicFile(
            genomic_file=genomic_file, is_input=genomic_file.is_harmonized))

    cavatica_task_genomic_files = db.relationship('CavaticaTaskGenomicFile',
                                                  backref='cavatica_task',
                                                  cascade='all, delete-orphan')
class TimestampMixin:
    """
    Defines the common timestammp columns on all Kids First tables
    """
    created_at = db.Column(db.DateTime(),
                           default=datetime.now,
                           doc="Time of object creation")
    modified_at = db.Column(db.DateTime(),
                            default=datetime.now,
                            onupdate=datetime.now,
                            doc="Time of last modification")
Exemple #7
0
class Family(db.Model, Base):
    """
    Family entity.

    :param kf_id: Unique id given by the Kid's First DCC
    :param external_id: Name given to the family by contributor
    :param family_type: Denotes type of family examples: duo, trio etc.
    :param created_at: Time of object creation
    :param modified_at: Last time of object modification
    """
    __tablename__ = "family"
    __prefix__ = 'FM'

    external_id = db.Column(db.Text(), doc='ID used by external study')
    family_type = db.Column(db.Text(), doc='Denotes type of family')
    participants = db.relationship(Participant, backref='family')
class Base(IDMixin, TimestampMixin):
    """
    Defines base SQlAlchemy model class
    :param visible: Flags visibility of data from the dataservice
    """
    visible = db.Column(db.Boolean(),
                        nullable=False,
                        server_default='true',
                        doc='Flags visibility of data from the dataservice')
class SequencingCenter(db.Model, Base):
    """
    SequencingExperiment entity.
    :param kf_id: Unique id given by the Kid's First DCC
    :param external_id: Name given to sequencing center by contributor
    :param name: Name given to sequencing center by contributor
    """
    __tablename__ = 'sequencing_center'
    __prefix__ = 'SC'
    external_id = db.Column(db.Text(), doc='external id used by contributor')
    name = db.Column(db.Text(),
                     nullable=False,
                     unique=True,
                     doc='Name given to sequencing center by contributor')
    sequencing_experiments = db.relationship(SequencingExperiment,
                                             backref=db.backref(
                                                 'sequencing_center',
                                                 lazy=True))
    biospecimens = db.relationship(Biospecimen,
                                   backref=db.backref('sequencing_center',
                                                      lazy=True))
class CavaticaApp(db.Model, Base):
    """
    CavaticaApp entity.
    :param kf_id: Unique id given by the Kid's First DCC
    :param external_cavatica_app_id: Id given to Cavatica app by user
    :param name: Name given to Cavatica app by user
    :param revision: Revision number of the Cavatica app
    :param github_commit_url: GitHub URL to the last git commit made for app
    """
    __tablename__ = 'cavatica_app'
    __prefix__ = 'CA'

    external_cavatica_app_id = db.Column(
        db.Text(), doc='Id given to Cavatica app by Cavatica user')
    name = db.Column(db.Text(),
                     doc='Name given to Cavatica app by Cavatica user')
    revision = db.Column(db.Integer(),
                         doc='Revision number of the'
                         ' Cavatica app assigned by Cavatica user')
    github_commit_url = db.Column(db.Text(),
                                  doc='Link to git commit on GitHub')
    tasks = db.relationship(Task, backref='cavatica_app')
class TaskGenomicFile(db.Model, Base):
    """
    Represents association table between task table and
    genomic_file table. Contains all task, genomic_file combiniations.

    :param kf_id: Unique id given by the Kid's First DCC
    :param created_at: Time of object creation
    :param modified_at: Last time of object modification
    :param is_input: Denotes whether the genomic file was an input to the
        executed task. True = Input, False = Output
    """

    __tablename__ = 'task_genomic_file'
    __prefix__ = 'TG'
    __table_args__ = (db.UniqueConstraint('genomic_file_id', 'task_id',
                                          'is_input'), )
    genomic_file_id = db.Column(KfId(),
                                db.ForeignKey('genomic_file.kf_id'),
                                nullable=False)

    task_id = db.Column(KfId(), db.ForeignKey('task.kf_id'), nullable=False)
    is_input = db.Column(db.Boolean(), nullable=False, default=True)
Exemple #12
0
class ReadGroupGenomicFile(db.Model, Base):
    """
    Represents association table between read_group table and
    genomic_file table. Contains all read_group, genomic_file combiniations.
    :param kf_id: Unique id given by the Kid's First DCC
    :param created_at: Time of object creation
    :param modified_at: Last time of object modification
    """
    __tablename__ = 'read_group_genomic_file'
    __prefix__ = 'RF'
    __table_args__ = (db.UniqueConstraint(
        'read_group_id',
        'genomic_file_id',
    ), )
    read_group_id = db.Column(KfId(),
                              db.ForeignKey('read_group.kf_id'),
                              nullable=False)

    genomic_file_id = db.Column(KfId(),
                                db.ForeignKey('genomic_file.kf_id'),
                                nullable=False)
    external_id = db.Column(db.Text(), doc='external id used by contributor')
class IDMixin:
    """
    Defines base ID columns common on all Kids First tables
    """
    __prefix__ = '__'

    @declared_attr
    def kf_id(cls):
        kf_id = db.Column(KfId(),
                          primary_key=True,
                          doc="ID assigned by Kids First",
                          default=kf_id_generator(cls.__prefix__))
        return kf_id

    uuid = db.Column(UUID(), unique=True, default=uuid_generator)
class Study(db.Model, Base):
    """
    Study entity representing the dbGaP study.

    :param kf_id: Unique id given by the Kid's First DCC
    :param created_at: Time of object creation
    :param modified_at: Last time of object modification
    :param data_access_authority: Name of organization which governs data
    access
    :param external_id: dbGaP accession number
    :param version: dbGaP version
    :param name: Name or title of study
    :short_name: Short name for study
    :param attribution: Link to attribution prose provided by dbGaP
    :param release_status: Release status of the study
    """
    __tablename__ = 'study'
    __prefix__ = 'SD'

    data_access_authority = db.Column(db.Text(),
                                      nullable=False,
                                      default='dbGaP')

    external_id = db.Column(db.Text(),
                            nullable=False,
                            doc='dbGaP accession number')
    version = db.Column(db.Text(), doc='dbGaP version')
    name = db.Column(db.Text(), doc='Name or title of study')
    short_name = db.Column(db.Text(), doc='Short name for study')
    attribution = db.Column(db.Text(),
                            doc='Link to attribution prose provided by dbGaP')
    release_status = db.Column(db.Text(), doc='Release status of the study')

    participants = db.relationship(Participant,
                                   cascade="all, delete-orphan",
                                   backref='study')
    investigator_id = db.Column(KfId(), db.ForeignKey('investigator.kf_id'))
    study_files = db.relationship(StudyFile,
                                  cascade="all, delete-orphan",
                                  backref='study')

    def __repr__(self):
        return '<Study {}>'.format(self.kf_id)
Exemple #15
0
class Phenotype(db.Model, Base):
    """
    Phenotype entity.
    :param kf_id: Unique id given by the Kid's First DCC
    :param created_at: Time of object creation
    :param modified_at: Last time of object modification
    :param external_id: Name given to phenotype by contributor
    :param source_text_phenotype: Name given to Phenotype by contributor
    :param hpo_id_phenotype: The ID of the term from the Human Phenotype
           Ontology which represents a harmonized phenotype
    :param snomed_id_phenotype: The ID of the term from Systematized
           Nomenclature of Medicine -- Clinical Terms which encodes clinical
           terminology
    :param observed: whether phenotype is negative or positive
    :param age_at_event_days: Age at the time phenotype was
            observed, expressed in number of days since birth
    """
    __tablename__ = 'phenotype'
    __prefix__ = 'PH'

    external_id = db.Column(db.Text(), doc='external id used by contributor')
    source_text_phenotype = db.Column(db.Text(),
                                      doc='Name given to Phenotype by '
                                      'contributor')
    hpo_id_phenotype = db.Column(db.Text(),
                                 doc='The ID of the term from Human Phenotype '
                                 'Ontology which represents a harmonized'
                                 ' phenotype')
    snomed_id_phenotype = db.Column(db.Text(),
                                    doc='The ID of the term from Systematized '
                                    'Nomenclature of Medicine --Clinical Terms'
                                    ' which encodes clinical terminology')
    observed = db.Column(db.Text(),
                         doc='whether phenotype is negative or positive')
    age_at_event_days = db.Column(db.Integer(),
                                  doc='Age at the time of event occurred in '
                                  'number of days since birth')
    participant_id = db.Column(KfId(),
                               db.ForeignKey('participant.kf_id'),
                               nullable=False)
class SequencingExperiment(db.Model, Base):
    """
    SequencingExperiment entity.
    :param kf_id: Unique id given by the Kid's First DCC
    :param external_id: Name given to sequencing experiment by contributor
    :param experiment_date : Date of the sequencing experiment conducted
    :param experiment_strategy: Text term that represents the library strategy
    :param library_name: Text term that represents the name of the library
    :param library_strand: Text term that represents the library stranded-ness
    :param is_paired_end: Boolean term specifies whether reads have paired end
    :param platform: Name of the platform used to obtain data
    :param instrument_model: Text term that represents the model of instrument
    :param max_insert_size: Maximum size of the fragmented DNA
    :param mean_insert_size: Mean size of the fragmented DNA
    :param mean_depth: (Coverage)Describes the amount of sequence data that
           is available per position in the sequenced genome territory
    :param total_reads: Total reads of the sequencing experiment
    :param mean_read_length: Mean lenth of the reads
    """
    __tablename__ = 'sequencing_experiment'
    __prefix__ = 'SE'

    external_id = db.Column(db.Text(), nullable=False,
                            doc='Name given to sequencing experiment by'
                            ' contributor')
    experiment_date = db.Column(db.DateTime(),
                                doc='Date of the sequencing experiment'
                                ' conducted')
    experiment_strategy = db.Column(db.Text(), nullable=False,
                                    doc='Text term that represents the'
                                    ' Library strategy')
    library_name = db.Column(db.Text(),
                             doc='Text term that represents the name of the'
                             ' library')
    library_strand = db.Column(db.Text(),
                               doc='Text term that represents the'
                               ' library stranded-ness')
    is_paired_end = db.Column(db.Boolean(), nullable=False,
                              doc='Boolean term specifies whether reads have'
                              ' paired end')
    platform = db.Column(db.Text(), nullable=False,
                         doc='Name of the platform used to obtain data')
    instrument_model = db.Column(db.Text(),
                                 doc='Text term that represents the model of'
                                 ' instrument')
    max_insert_size = db.Column(db.Integer(),
                                doc='Maximum size of the fragmented DNA')
    mean_insert_size = db.Column(db.Float(),
                                 doc='Mean size of the fragmented DNA')
    mean_depth = db.Column(db.Float(),
                           doc='Mean depth or coverage describes the amount of'
                           ' sequence data that is available per position in'
                           ' the sequenced genome territory')
    total_reads = db.Column(db.Integer(),
                            doc='Total reads of the sequencing experiment')
    mean_read_length = db.Column(db.Float(),
                                 doc='Mean lenth of the reads')
    genomic_files = db.relationship(GenomicFile,
                                    backref=db.backref(
                                        'sequencing_experiment',
                                        lazy=True))
    sequencing_center_id = db.Column(KfId(),
                                     db.ForeignKey('sequencing_center.kf_id'),
                                     nullable=False,
                                     doc='The kf_id of the sequencing center')
 class TestModel(Base, db.Model):
     __prefix__ = 'TT'
     field = db.Column(db.String)
class FamilyRelationship(db.Model, Base):
    """
    Represents a relationship between two family members.

    The relationship table represents a directed graph. One or more
    relationships may exist between any two participants.
    (P1 -> P2 is different than P2 -> P1)

    :param kf_id: Primary key given by the Kid's First DCC
    :param created_at: Time of object creation
    :param modified_at: Last time of object modification
    :param external_id: Name given to family_relationship by contributor
    :param participant1_id: Kids first id of the first Participant in the
    relationship
    :param participant2_id: Kids first id of the second Participant
    in the relationship
    :param relationship_type: Text describing the nature of the
    relationship (i.e. father, mother, sister, brother)
    :param _rel_name: an autogenerated parameter used to ensure that the
    relationships are not duplicated and the graph is undirected
    :param source_text_notes: Text notes from source describing the
    relationship
    """
    __tablename__ = 'family_relationship'
    __prefix__ = 'FR'
    __table_args__ = (db.UniqueConstraint(
        'participant1_id', 'participant2_id',
        'participant1_to_participant2_relation',
        'participant2_to_participant1_relation'), )
    external_id = db.Column(db.Text(), doc='external id used by contributor')
    participant1_id = db.Column(
        KfId(),
        db.ForeignKey('participant.kf_id'),
        nullable=False,
        doc='kf_id of one participant in the relationship')

    participant2_id = db.Column(
        KfId(),
        db.ForeignKey('participant.kf_id'),
        nullable=False,
        doc='kf_id of the other participant in the relationship')

    participant1_to_participant2_relation = db.Column(db.Text(),
                                                      nullable=False)

    participant2_to_participant1_relation = db.Column(db.Text())
    source_text_notes = db.Column(db.Text(),
                                  doc='Text notes from source describing '
                                  'the relationship')

    participant1 = db.relationship(
        Participant,
        primaryjoin=participant1_id == Participant.kf_id,
        backref=db.backref('outgoing_family_relationships',
                           cascade='all, delete-orphan'))

    participant2 = db.relationship(
        Participant,
        primaryjoin=participant2_id == Participant.kf_id,
        backref=db.backref('incoming_family_relationships',
                           cascade='all, delete-orphan'))

    @classmethod
    def query_all_relationships(cls,
                                participant_kf_id=None,
                                model_filter_params=None):
        """
        Find all family relationships for a participant

        :param participant_kf_id: Kids First ID of the participant
        :param model_filter_params: Filter parameters to the query

        Given a participant's kf_id, return all of the biological
        family relationships of the participant and the relationships
        of the participant's family members.

        If the participant does not have a family defined, then return
        all of the immediate/direct family relationships of the participant.
        """
        # Apply model property filter params
        if model_filter_params is None:
            model_filter_params = {}
        q = FamilyRelationship.query.filter_by(**model_filter_params)

        # Get family relationships and join with participants
        q = q.join(
            Participant,
            or_(FamilyRelationship.participant1,
                FamilyRelationship.participant2))

        # Do this bc query.get() errors out if passed None
        if participant_kf_id:
            pt = Participant.query.get(participant_kf_id)
            family_id = pt.family_id if pt else None

            # Use family to get all family relationships in participants family
            if family_id:
                q = q.filter(Participant.family_id == family_id)

            # No family provided, use just family relationships
            # to get only immediate family relationships for participant
            else:
                q = q.filter(
                    or_(
                        FamilyRelationship.participant1_id ==
                        participant_kf_id, FamilyRelationship.participant2_id
                        == participant_kf_id))

        # Don't want duplicates - return unique family relationships
        q = q.group_by(FamilyRelationship.kf_id)

        return q

    def __repr__(self):
        return '<{} is {} of {}>'.format(
            self.participant1.kf_id,
            self.participant1_to_participant2_relation,
            self.participant2.kf_id)
class Diagnosis(db.Model, Base):
    """
    Diagnosis entity.

    :param _id: Unique id assigned by RDBMS
    :param kf_id: Unique id given by the Kid's First DCC
    :param created_at: Time of object creation
    :param modified_at: Last time of object modification
    :param external_id: Name given to diagnosis by contributor
    :param source_text_diagnosis: Diagnosis of the participant
    :param diagnosis_category: High level diagnosis categorization
    :param source_text_tumor_location: Location of the tumor
    :param age_at_event_days: Age at the time of diagnosis expressed
           in number of days since birth
    :param mondo_id_diagnosis: The ID of the term from the Monary Disease
           Ontology which represents a harmonized diagnosis
    :param icd_id_diagnosis: The ID of the term from the International
           Classification of Diseases which represents a harmonized diagnosis
    :param uberon_id_tumor_location: The ID of the term from Uber-anatomy
           ontology which represents harmonized anatomical ontologies
    :param ncit_id_diagnosis: The ID term from the National Cancer Institute
           Thesaurus which represents a harmonized diagnosis
    :param spatial_descriptor: Ontology term that harmonizes the spatial
           concepts from Biological Spatial Ontology
    """
    __tablename__ = 'diagnosis'
    __prefix__ = 'DG'

    external_id = db.Column(db.Text(), doc='external id used by contributor')
    source_text_diagnosis = db.Column(db.Text(),
                                      doc='the pathological diagnosis')
    diagnosis_category = db.Column(db.Text(),
                                   doc='High level diagnosis categorization')
    source_text_tumor_location = db.Column(db.Text(),
                                           doc='location of the tumor')
    age_at_event_days = db.Column(db.Integer(),
                                  doc='Age at the time of event occurred in '
                                  'number of days since birth')
    mondo_id_diagnosis = db.Column(db.Text(),
                                   doc='The ID of the term from the Monary '
                                   'Disease Ontology which represents a'
                                   ' harmonized diagnosis')
    icd_id_diagnosis = db.Column(db.Text(),
                                 doc='The ID of the term from the'
                                 ' International Classification of Diseases'
                                 ' which represents harmonized diagnosis')
    uberon_id_tumor_location = db.Column(db.Text(),
                                         doc='The ID of the term from Uber '
                                         'anatomy ontology which represents'
                                         ' harmonized anatomical ontologies')
    ncit_id_diagnosis = db.Column(db.Text(),
                                  doc='The ID term from the National Cancer'
                                  ' Institute Thesaurus which represents a'
                                  ' harmonized diagnosis')
    spatial_descriptor = db.Column(db.Text(),
                                   doc='Ontology term that harmonizes the'
                                   'spatial concepts from Biological Spatial'
                                   ' Ontology')
    participant_id = db.Column(KfId(),
                               db.ForeignKey('participant.kf_id'),
                               doc='the participant who was diagnosed',
                               nullable=False)
class Biospecimen(db.Model, Base):
    """
    Biospecimen entity.
    :param kf_id: Unique id given by the Kid's First DCC
    :param external_sample_id: Name given to sample by contributor
    :param external_aliquot_id: Name given to aliquot by contributor
    :param composition : The cellular composition of the biospecimen.
    :param source_text_tissue_type: description of the kind of tissue collected
           with respect to disease status or proximity to tumor tissue
    :param source_text_anatomical_site : The name of the primary disease site
           of the submitted tumor biospecimen
    :param age_at_event_days: Age at the time biospecimen was
           acquired, expressed in number of days since birth
    :param source_text_tumor_descriptor: The kind of disease present in the
           tumor specimen as related to a specific timepoint
    :param shipment_origin : The origin of the shipment
    :param analyte_type: Text term that represents the kind of molecular
           specimen analyte
    :param concentration_mg_per_ml: The concentration of an analyte or aliquot
           extracted from the biospecimen or biospecimen portion, measured in
           milligrams per milliliter
    :param volume_ul: The volume in microliters (ul) of the aliquots derived
           from the analyte(s) shipped for sequencing and characterization
    :param shipment_date: The date item was shipped in YYYY-MM-DD format
    :param uberon_id_anatomical_site: The ID of the term from Uber-anatomy
           ontology which represents harmonized anatomical ontologies
    :param ncit_id_tissue_type: The ID term from the National Cancer Institute
           Thesaurus which represents a harmonized tissue_type
    :param ncit_id_anatomical_site: The ID term from the National Cancer
           Institute Thesaurus which represents a harmonized anatomical_site
    :param spatial_descriptor: Ontology term that harmonizes the spatial
           concepts from Biological Spatial Ontology
    :param consent_type: Short name of consent
    :param dbgap_consent_code: Consent classification code from dbgap
    """

    __tablename__ = 'biospecimen'
    __prefix__ = 'BS'

    external_sample_id = db.Column(db.Text(),
                                   doc='Name given to sample by contributor')
    external_aliquot_id = db.Column(db.Text(),
                                    doc='Name given to aliquot by contributor')
    source_text_tissue_type = db.Column(db.Text(),
                                        doc='Description of the kind of '
                                        'biospecimen collected')
    composition = db.Column(db.Text(),
                            doc='The cellular composition of the biospecimen')
    source_text_anatomical_site = db.Column(db.Text(),
                                            doc='The anatomical location of '
                                            'collection')
    age_at_event_days = db.Column(db.Integer(),
                                  doc='Age at the time of event occurred in '
                                  'number of days since birth.')
    source_text_tumor_descriptor = db.Column(db.Text(),
                                             doc='Disease present in the '
                                             'biospecimen')
    shipment_origin = db.Column(db.Text(),
                                doc='The original site of the aliquot')
    analyte_type = db.Column(db.Text(),
                             nullable=False,
                             doc='The molecular description of the aliquot')
    concentration_mg_per_ml = db.Column(db.Float(),
                                        doc='The concentration of the aliquot')
    volume_ul = db.Column(db.Float(), doc='The volume of the aliquot')
    shipment_date = db.Column(db.DateTime(),
                              doc='The date the aliquot was shipped')
    uberon_id_anatomical_site = db.Column(db.Text(),
                                          doc='The ID of the term from '
                                          'Uber-anatomy ontology which '
                                          'represents harmonized anatomical'
                                          ' ontologies')
    ncit_id_tissue_type = db.Column(db.Text(),
                                    doc='The ID term from the National Cancer'
                                    'Institute Thesaurus which represents a '
                                    'harmonized tissue_type')
    ncit_id_anatomical_site = db.Column(db.Text(),
                                        doc='The ID term from the National'
                                        'Cancer Institute Thesaurus which '
                                        'represents a harmonized'
                                        ' anatomical_site')
    spatial_descriptor = db.Column(db.Text(),
                                   doc='Ontology term that harmonizes the'
                                   'spatial concepts from Biological Spatial'
                                   ' Ontology')
    method_of_sample_procurement = db.Column(db.Text(),
                                             doc='The method used to procure '
                                             'the sample used to extract '
                                             'analyte(s)')
    participant_id = db.Column(KfId(),
                               db.ForeignKey('participant.kf_id'),
                               nullable=False,
                               doc='The kf_id of the biospecimen\'s donor')
    sequencing_center_id = db.Column(KfId(),
                                     db.ForeignKey('sequencing_center.kf_id'),
                                     nullable=False,
                                     doc='The kf_id of the sequencing center')
    consent_type = db.Column(db.Text(), doc='Short name of consent')
    dbgap_consent_code = db.Column(
        db.Text(), doc='Consent classification code from dbgap')
    genomic_files = association_proxy(
        'biospecimen_genomic_files',
        'genomic_file',
        creator=lambda genomic_file: BiospecimenGenomicFile(genomic_file=
                                                            genomic_file))

    diagnoses = association_proxy(
        'biospecimen_diagnoses',
        'diagnosis',
        creator=lambda dg: BiospecimenDiagnosis(diagnosis=dg))

    biospecimen_genomic_files = db.relationship(BiospecimenGenomicFile,
                                                backref='biospecimen',
                                                cascade='all, delete-orphan')
 def kf_id(cls):
     kf_id = db.Column(KfId(),
                       primary_key=True,
                       doc="ID assigned by Kids First",
                       default=kf_id_generator(cls.__prefix__))
     return kf_id
class IndexdFile:
    """
    Field reflection for objects that are stored in indexd

    # Creation

    When an indexd file is created, an instance of the orm model here is
    created, and when persisted to the database, a request is sent to Gen3
    indexd to register the file in the service. Upon successful registry
    of the file, a response containing a did (digital identifier) will be
    recieved. The IndexdFile will then be inserted into the database using
    the baseid as its uuid.

    # Update

    When a file is updated in indexd, a new version with a new did is created.
    The document still shares a base_id with the older versions, but a document
    may not be retrieved with the base_id alone. Because of this, the
    latest_did is stored on the file.

    # Deletion

    A file deleted through a DELETE on the dataservice api will immediately
    delete that file from the dataservice's database, as well as send
    a corresponding DELETE to the indexd service.
    Though it should not occur, a file deleted through indexd will remain
    in the dataservice's database until it a retrieval is attempted.
    If indexd returns a not found error, the dataservice will automatically
    remove that file from the database, giving the appearence that the two
    are in sync from the viewpoint of the dataservice API.
    """
    # Store the latest did in the database
    # files in indexd cannot be looked up by their baseid
    latest_did = db.Column(UUID(), nullable=False)

    file_name = ''
    urls = []
    rev = None
    hashes = {}
    acl = []
    # The metadata property is already used by sqlalchemy
    _metadata = {}
    size = None

    @reconstructor
    def constructor(self):
        """
        Builds the object by initializing properties and updating them
        from indexd.

        """
        # Fields used by indexd, but not tracked in the database
        self.file_name = ''
        self.urls = []
        self.rev = None
        self.hashes = {}
        self.acl = []
        # The metadata property is already used by sqlalchemy
        self._metadata = {}
        self.size = None
        # Update fields from indexd
        self.merge_indexd()

    def merge_indexd(self):
        """
        If the document matching this object's latest_did cannot be found in
        indexd, remove the object from the database

        :returns: This object, if merge was successful, otherwise None
        """
        try:
            return indexd.get(self)
        except RecordNotFound as err:
            self.was_deleted = True
            db.session.delete(self)
            db.session.commit()
            return None
class Participant(db.Model, Base):
    """
    Participant entity.
    :param kf_id: Unique id given by the Kid's First DCC
    :param external_id: Name given to participant by contributor
    :param family_id: Id for the participants grouped by family
    :param is_proband: Denotes whether participant is proband of study
    :param race: Race of participant
    :param ethnicity: Ethnicity of participant
    :param gender: Self reported gender of participant
    :param affected_status: Denotes whether participant is affected
    :param diagnosis_category: High level diagnosis categorization
    :param created_at: Time of object creation
    :param modified_at: Last time of object modification
    """
    __tablename__ = 'participant'
    __prefix__ = 'PT'

    external_id = db.Column(db.Text(), doc='ID used by external study')
    family_id = db.Column(KfId(),
                          db.ForeignKey('family.kf_id'),
                          nullable=True,
                          doc='Id for the participants grouped by family')
    is_proband = db.Column(
        db.Boolean(), doc='Denotes whether participant is proband of study')
    race = db.Column(db.Text(), doc='The race of the participant')
    ethnicity = db.Column(db.Text(), doc='The ethnicity of the participant')
    gender = db.Column(db.Text(), doc='The gender of the participant')
    affected_status = db.Column(db.Boolean(),
                                doc='Denotes whether participant is affected')
    diagnosis_category = db.Column(db.Text(),
                                   doc='High level diagnosis categorization')
    species = db.Column(db.Text(),
                        default='H**o sapiens',
                        doc='The species of the research particpant')
    diagnoses = db.relationship(Diagnosis,
                                cascade='all, delete-orphan',
                                backref=db.backref('participant', lazy=True))
    biospecimens = db.relationship(Biospecimen,
                                   backref='participant',
                                   cascade='all, delete-orphan')
    outcomes = db.relationship(Outcome,
                               cascade='all, delete-orphan',
                               backref=db.backref('participant', lazy=True))
    phenotypes = db.relationship(Phenotype,
                                 cascade='all, delete-orphan',
                                 backref=db.backref('participant', lazy=True))

    study_id = db.Column(KfId(), db.ForeignKey('study.kf_id'), nullable=False)

    alias_group_id = db.Column(KfId(), db.ForeignKey('alias_group.kf_id'))

    def add_alias(self, pt):
        """
        A convenience method to make participant 'pt'
        an alias of participant 'self'.
        There are 4 cases to consider:
        1) Participant pt and self have not been assigned an alias
        group. Create a new alias group and add both particpants to it.
        2) Participant pt does not have an alias group, but participant self
        does. Add pt to self's alias group.
        3) Participant self does not have an alias group but particpant pt
        does. Add self to pt's alias group
        4) Both participants already have an alias group. Find which particpant
        has the smaller alias group and merge all particpants in the
        smaller group into the larger group
        ** NOTE ** A particpant's aliases can also be created manually by
        direct manipulation of the particpants in an AliasGroup or
        the particpant's alias_group_id. However, then it is completely up to
        the user to ensure all aliases are in the right group and there aren't
        redundant groups that exist.
        """
        # Neither particpant has an alias group yet
        if (not pt.alias_group) and (not self.alias_group):
            g = AliasGroup()
            g.participants.extend([self, pt])

        # Self belongs to alias group, pt does not
        elif (not pt.alias_group) and (self.alias_group):
            self.alias_group.particpants.append(pt)

        # pt belongs to an alias group, self does not
        elif pt.alias_group and (not self.alias_group):
            pt.alias_group.particpants.append(self)

        # Both particpants belong to two different alias groups
        elif pt.alias_group and self.alias_group:
            # Find smaller alias group first
            c1 = (Participant.query.filter_by(
                alias_group_id=self.alias_group_id).count())
            c2 = (Participant.query.filter_by(
                alias_group_id=pt.alias_group_id).count())

            smaller_alias_group = self.alias_group
            larger_alias_group = pt.alias_group
            if c2 <= c1:
                larger_alias_group = self.alias_group
                smaller_alias_group = pt.alias_group

            # Merge smaller alias group with larger alias group
            # aka, change all participants' alias_group_id in the smaller group
            # to be the alias_group_id of the larger group
            for p in (db.session.query(Participant).filter(
                    Participant.alias_group_id == smaller_alias_group.kf_id)):
                p.alias_group = larger_alias_group

            # Delete old alias group
            db.session.delete(smaller_alias_group)

    @property
    def aliases(self):
        """
        Retrieve aliases of participant
        Return all participants with same alias group id
        """
        if self.alias_group:
            return [
                pt for pt in Participant.query.filter(
                    and_(Participant.alias_group_id == self.alias_group_id),
                    Participant.kf_id != self.kf_id)
            ]
        else:
            return []

    def __repr__(self):
        return '<Participant {}>'.format(self.kf_id)