Exemple #1
0
class ClinicalData(db.Document):
    '''
    索引方式:project-->subproject-->patient,不可能同一个患者两次出现在同一个subproject
    '''
    age = db.FloatField(min_value=0.001, max_value=1200)  # 单位: 月
    age_began_smoking_in_years = db.IntField(min_value=1900, max_value=2999)
    alcohol_history_documented = db.BooleanField()  # 0: 无饮酒史,1: 有饮酒史
    amount_of_alcohol_comsumption_per_day = db.FloatField(min_value=0.01,
                                                          max_value=100)
    frequency_of_alcohol_consumption = db.IntField(min_value=1, max=7)
    gender = db.BooleanField()  # 0: 女性 1: 男性
    height = db.FloatField(min_value=30, max_value=300)
    lymph_node_examined_count = db.FloatField(min_value=0.1, max_value=100)
    number_of_lymphnodes_position_by_he = db.FloatField(min_value=0.1,
                                                        max_value=100)
    number_of_lymphnodes_position_by_ihc = db.FloatField(min_value=0.1,
                                                         max_value=100)
    number_pack_years_smoked = db.FloatField(min_value=0.01, max_value=100)
    OS = db.FloatField(min_value=0.01, max_value=1200)
    OS_IND = db.BooleanField()
    patient_id = db.StringField(max_length=50, required=True)
    project_ref = db.ReferenceField(Project, required=True)
    project_name = db.StringField(max_length=30, required=True)
    subproject_name = db.StringField(max_length=30, required=True)
    RFS = db.FloatField(min_value=0.01, max_value=1200)
    RFS_IND = db.BooleanField()
    stopped_smoking_year = db.IntField(min_value=1900, max_value=2999)
    tobacco_smoking_history = db.BooleanField()
    weight = db.FloatField(min_value=1, max_value=500)
Exemple #2
0
class Project(db.Document):
    '''
    data_file_md5: 可能有多个原始数据文件需要导入,因此设置为List类别
    src_project_id: 当前project的数据可能来源于多个原project,因此设置为List类别
    src_project对应subproject,用于将多个project聚合成一个project的场景,每次聚合都意味着数据经过了某种处理
    '''
    analysis_pipeline_ref = db.ReferenceField(AnalysisProgram, required=True)
    created_date = db.DateTimeField(required=True)
    data_file_md5_lst = db.ListField(db.StringField(max_length=32,
                                                    required=True),
                                     required=True)
    data_file_name_lst = db.ListField(db.StringField(max_length=255,
                                                     required=True),
                                      required=True)
    data_type = db.StringField(max_length=10, required=True)
    description = db.StringField(max_length=512)
    import_data_program_ref = db.ReferenceField(AnalysisProgram, required=True)
    normalized = db.BooleanField()
    normalized_method = db.StringField(max_length=10, required=True)
    num_of_samples = db.IntField(required=True)
    project_name = db.StringField(max_length=30, required=True)
    src_project_id_ref_lst = db.ListField(db.ReferenceField('Project',
                                                            required=True),
                                          required=True)
    src_project_name_lst = db.ListField(db.StringField(max_length=30,
                                                       required=True),
                                        required=True)
    url = db.StringField(max_length=100)
    version = db.StringField(max_length=10, required=True)
Exemple #3
0
class TranscriptExpr(db.Document):
    '''
    索引方式:project-->subproject-->transcript,通过subproject限定检索范围,一个subproject可以唯一确定一个transcript
    '''
    clinical_data_id_lst = db.ListField(db.ReferenceField('ClinicalData',
                                                          required=True),
                                        required=True)
    expr_value_lst = db.ListField(db.FloatField(), required=True)
    phenotype_data_id_lst = db.ListField(db.ReferenceField('PhenotypeData',
                                                           required=True),
                                         required=True)
    project_name = db.StringField(max_length=30, required=True)
    project_ref = db.ReferenceField(Project, required=True)
    samples_data_id_lst = db.ListField(db.ReferenceField('SampleData',
                                                         required=True),
                                       required=True)
    source_type = db.StringField(max_length=50, required=True)
    species = db.StringField(max_length=10, required=True)
    subproject_name = db.StringField(max_length=30, required=True)
    transcript_ensembl_id = db.StringField(max_length=50, required=True)
Exemple #4
0
class GeneExpr(db.Document):
    '''
    索引方式:project-->subproject-->gene,通过subproject限定检索范围,一个subproject可以唯一确定一个gene
    性能陷阱:sample_id和clinical_data_id等只可用于索引表达值,不可用于检索确定基因,否则将带来性能问题,换而言之,
            基因或转录本的唯一确定是由project-subproject决定的,而非样本或患者
    '''
    clinical_data_id_lst = db.ListField(db.ReferenceField('ClinicalData',
                                                          required=True),
                                        required=True)
    expr_value_lst = db.ListField(db.FloatField(), required=True)
    gene_ensembl_id = db.StringField(max_length=50, required=True)
    project_ref = db.ReferenceField(Project, required=True)
    project_name = db.StringField(max_length=30, required=True)
    subproject_name = db.StringField(max_length=30, required=True)
    samples_data_id_lst = db.ListField(db.ReferenceField('SampleData',
                                                         required=True),
                                       required=True)
    species = db.StringField(max_length=10, required=True)
    source_type = db.StringField(max_length=50, required=True)
    phenotype_data_id_lst = db.ListField(db.ReferenceField('PhenotypeData',
                                                           required=True),
                                         required=True)
Exemple #5
0
class AnalysisProgram(db.Document):
    created_author = db.StringField(max_length=20, required=True)
    created_date = db.DateTimeField(required=True)
    description = db.StringField(max_length=500, required=False)
    document_file = db.StringField(max_length=255, required=True)
    md5 = db.StringField(max_length=32, required=True)
    path = db.StringField(max_length=255, required=True)
    program_name = db.StringField(max_length=100, required=True)
Exemple #6
0
class SampleData(db.Document):
    '''
    索引方式:project-->subproject-->sample,不可能同一个样本两次出现在同一个subproject
    '''
    clinical_data_id_ref = db.ReferenceField(ClinicalData, required=True)
    concentration = db.FloatField(min_value=0.001, max_value=1000)
    ERCC = db.StringField(max_length=10)
    FFPE = db.BooleanField()
    histological_type = db.StringField(max_length=30, required=True)
    od260_280 = db.FloatField(min_value=0.001, max_value=1000)
    primary_site = db.StringField(max_length=30, required=True)
    project_ref = db.ReferenceField(Project, required=True)
    project_name = db.StringField(max_length=30, required=True)
    subproject_name = db.StringField(max_length=30, required=True)
    reads = db.FloatField(min_value=0.01, max_value=1000)
    rin = db.FloatField(min_value=0.001, max_value=1000)
    sample_id = db.StringField(max_length=50, required=True)
    source = db.StringField(max_length=30, required=True)
    species = db.StringField(max_length=10, required=True)
    tissue_histological_subtype = db.StringField(max_length=30, required=False)
    tissue_molecular_subtype = db.StringField(max_length=30, required=False)
    volume = db.FloatField(min_value=0.001, max_value=1000)
    weight = db.FloatField(min_value=0.001, max_value=1000)
    yields = db.FloatField(min_value=0.01, max_value=1000)
Exemple #7
0
class ExprInfo(db.Document):
    '''
    临时资源,用于多ID查询的场景
    '''
    clinical_data_id_lst = db.ListField(
        db.ReferenceField('ClinicalData', required=True))
    expr_value_lst = db.ListField(db.FloatField(), required=True)
    transcript_ensembl_id_lst = db.ListField(
        db.StringField(max_length=50, required=True))
    gene_ensembl_id_lst = db.ListField(
        db.StringField(max_length=50, required=True))
    project_ref = db.ReferenceField(Project, required=True)
    subproject_name_lst = db.ListField(
        db.StringField(max_length=30, required=True))
    project_name = db.StringField(max_length=30, required=True)
    samples_data_id_lst = db.ListField(
        db.ReferenceField('SampleData', required=True))
    species_lst = db.ListField(db.StringField(max_length=10, required=True))
    source_type_lst = db.ListField(db.StringField(max_length=50,
                                                  required=True),
                                   required=True)
    phenotype_data_id_lst = db.ListField(
        db.ReferenceField('PhenotypeData', required=True))
    query_condition_md5 = db.StringField(max_length=128, required=True)
Exemple #8
0
class Mutation(db.Document):
    chromosome = db.StringField(max_length=10, required=True)
    clinical_data_id_lst = db.ListField(db.ReferenceField('ClinicalData',
                                                          required=True),
                                        required=True)
    dbsnp_rs = db.StringField(max_length=50, required=True)
    end_position = db.StringField(max_length=50, required=True)
    gene_ensembl_id = db.StringField(max_length=50, required=True)
    transcript_ensembl_id = db.StringField(max_length=50, required=True)
    phenotype_data_id_lst = db.ListField(db.ReferenceField('PhenotypeData',
                                                           required=True),
                                         required=True)
    project_name = db.StringField(max_length=30, required=True)
    project_ref = db.ReferenceField(Project, required=True)
    protein_change = db.StringField(max_length=50, required=True)
    reference_allele = db.StringField(max_length=50, required=True)
    samples_data_id_lst = db.ListField(db.ReferenceField('SampleData',
                                                         required=True),
                                       required=True)
    source_type = db.StringField(max_length=50, required=True)
    species = db.StringField(max_length=10, required=True)
    start_position = db.StringField(max_length=50, required=True)
    subproject_name = db.StringField(max_length=30, required=True)
    transcript_strand = db.StringField(max_length=50, required=True)
    tumor_sample_barcode = db.StringField(max_length=50, required=True)
    tumor_seq_allele1 = db.StringField(max_length=50, required=True)
    tumor_seq_allele2 = db.StringField(max_length=50, required=True)
    variant_classification = db.StringField(max_length=50, required=True)
    variant_type = db.StringField(max_length=50, required=True)