Beispiel #1
0
class Project(db.Document):
    '''
    data_file_md5: 可能有多个原始数据文件需要导入,因此设置为List类别
    src_project_id: 当前project的数据可能来源于多个原project,因此设置为List类别
    src_project对应subproject,用于将多个project聚合成一个project的场景,每次聚合都意味着数据经过了某种处理
    '''
    analysis_pipeline_ref = db.ReferenceField(AnalysisProgram, required=True)
    created_date = db.DateTimeField(required=True)
    data_file_md5_lst = db.ListField(db.StringField(max_length=32,
                                                    required=True),
                                     required=True)
    data_file_name_lst = db.ListField(db.StringField(max_length=255,
                                                     required=True),
                                      required=True)
    data_type = db.StringField(max_length=10, required=True)
    description = db.StringField(max_length=512)
    import_data_program_ref = db.ReferenceField(AnalysisProgram, required=True)
    normalized = db.BooleanField()
    normalized_method = db.StringField(max_length=10, required=True)
    num_of_samples = db.IntField(required=True)
    project_name = db.StringField(max_length=30, required=True)
    src_project_id_ref_lst = db.ListField(db.ReferenceField('Project',
                                                            required=True),
                                          required=True)
    src_project_name_lst = db.ListField(db.StringField(max_length=30,
                                                       required=True),
                                        required=True)
    url = db.StringField(max_length=100)
    version = db.StringField(max_length=10, required=True)
Beispiel #2
0
class Mutation(db.Document):
    chromosome = db.StringField(max_length=10, required=True)
    clinical_data_id_lst = db.ListField(db.ReferenceField('ClinicalData',
                                                          required=True),
                                        required=True)
    dbsnp_rs = db.StringField(max_length=50, required=True)
    end_position = db.StringField(max_length=50, required=True)
    gene_ensembl_id = db.StringField(max_length=50, required=True)
    transcript_ensembl_id = db.StringField(max_length=50, required=True)
    phenotype_data_id_lst = db.ListField(db.ReferenceField('PhenotypeData',
                                                           required=True),
                                         required=True)
    project_name = db.StringField(max_length=30, required=True)
    project_ref = db.ReferenceField(Project, required=True)
    protein_change = db.StringField(max_length=50, required=True)
    reference_allele = db.StringField(max_length=50, required=True)
    samples_data_id_lst = db.ListField(db.ReferenceField('SampleData',
                                                         required=True),
                                       required=True)
    source_type = db.StringField(max_length=50, required=True)
    species = db.StringField(max_length=10, required=True)
    start_position = db.StringField(max_length=50, required=True)
    subproject_name = db.StringField(max_length=30, required=True)
    transcript_strand = db.StringField(max_length=50, required=True)
    tumor_sample_barcode = db.StringField(max_length=50, required=True)
    tumor_seq_allele1 = db.StringField(max_length=50, required=True)
    tumor_seq_allele2 = db.StringField(max_length=50, required=True)
    variant_classification = db.StringField(max_length=50, required=True)
    variant_type = db.StringField(max_length=50, required=True)
Beispiel #3
0
class TranscriptExpr(db.Document):
    '''
    索引方式:project-->subproject-->transcript,通过subproject限定检索范围,一个subproject可以唯一确定一个transcript
    '''
    clinical_data_id_lst = db.ListField(db.ReferenceField('ClinicalData',
                                                          required=True),
                                        required=True)
    expr_value_lst = db.ListField(db.FloatField(), required=True)
    phenotype_data_id_lst = db.ListField(db.ReferenceField('PhenotypeData',
                                                           required=True),
                                         required=True)
    project_name = db.StringField(max_length=30, required=True)
    project_ref = db.ReferenceField(Project, required=True)
    samples_data_id_lst = db.ListField(db.ReferenceField('SampleData',
                                                         required=True),
                                       required=True)
    source_type = db.StringField(max_length=50, required=True)
    species = db.StringField(max_length=10, required=True)
    subproject_name = db.StringField(max_length=30, required=True)
    transcript_ensembl_id = db.StringField(max_length=50, required=True)
Beispiel #4
0
class GeneExpr(db.Document):
    '''
    索引方式:project-->subproject-->gene,通过subproject限定检索范围,一个subproject可以唯一确定一个gene
    性能陷阱:sample_id和clinical_data_id等只可用于索引表达值,不可用于检索确定基因,否则将带来性能问题,换而言之,
            基因或转录本的唯一确定是由project-subproject决定的,而非样本或患者
    '''
    clinical_data_id_lst = db.ListField(db.ReferenceField('ClinicalData',
                                                          required=True),
                                        required=True)
    expr_value_lst = db.ListField(db.FloatField(), required=True)
    gene_ensembl_id = db.StringField(max_length=50, required=True)
    project_ref = db.ReferenceField(Project, required=True)
    project_name = db.StringField(max_length=30, required=True)
    subproject_name = db.StringField(max_length=30, required=True)
    samples_data_id_lst = db.ListField(db.ReferenceField('SampleData',
                                                         required=True),
                                       required=True)
    species = db.StringField(max_length=10, required=True)
    source_type = db.StringField(max_length=50, required=True)
    phenotype_data_id_lst = db.ListField(db.ReferenceField('PhenotypeData',
                                                           required=True),
                                         required=True)
Beispiel #5
0
class ExprInfo(db.Document):
    '''
    临时资源,用于多ID查询的场景
    '''
    clinical_data_id_lst = db.ListField(
        db.ReferenceField('ClinicalData', required=True))
    expr_value_lst = db.ListField(db.FloatField(), required=True)
    transcript_ensembl_id_lst = db.ListField(
        db.StringField(max_length=50, required=True))
    gene_ensembl_id_lst = db.ListField(
        db.StringField(max_length=50, required=True))
    project_ref = db.ReferenceField(Project, required=True)
    subproject_name_lst = db.ListField(
        db.StringField(max_length=30, required=True))
    project_name = db.StringField(max_length=30, required=True)
    samples_data_id_lst = db.ListField(
        db.ReferenceField('SampleData', required=True))
    species_lst = db.ListField(db.StringField(max_length=10, required=True))
    source_type_lst = db.ListField(db.StringField(max_length=50,
                                                  required=True),
                                   required=True)
    phenotype_data_id_lst = db.ListField(
        db.ReferenceField('PhenotypeData', required=True))
    query_condition_md5 = db.StringField(max_length=128, required=True)