def parse_individuals(samples): """Parse the individual information Reformat sample information to proper individuals Args: samples(list(dict)) Returns: individuals(list(dict)) """ individuals = [] if len(samples) == 0: raise PedigreeError("No samples could be found") ind_ids = set() for sample_info in samples: parsed_ind = parse_individual(sample_info) individuals.append(parsed_ind) ind_ids.add(parsed_ind['individual_id']) # Check if relations are correct for parsed_ind in individuals: father = parsed_ind['father'] if (father and father != '0'): if father not in ind_ids: raise PedigreeError('father %s does not exist in family' % father) mother = parsed_ind['mother'] if (mother and mother != '0'): if mother not in ind_ids: raise PedigreeError('mother %s does not exist in family' % mother) return individuals
def parse_individual(sample): """Parse individual information Args: sample (dict) Returns: { 'individual_id': str, 'father': str, 'mother': str, 'display_name': str, 'sex': str, 'phenotype': str, 'bam_file': str, 'analysis_type': str, 'capture_kits': list(str), } """ ind_info = {} if 'sample_id' not in sample: raise PedigreeError("One sample is missing 'sample_id'") sample_id = sample['sample_id'] # Check the sex if 'sex' not in sample: raise PedigreeError("Sample %s is missing 'sex'" % sample_id) sex = sample['sex'] if sex not in REV_SEX_MAP: logger.warning("'sex' is only allowed to have values from {}".format( ', '.join(list(REV_SEX_MAP.keys())))) raise PedigreeError("Individual %s has wrong formated sex" % sample_id) # Check the phenotype if 'phenotype' not in sample: raise PedigreeError("Sample %s is missing 'phenotype'" % sample_id) phenotype = sample['phenotype'] if phenotype not in REV_PHENOTYPE_MAP: logger.warning( "'phenotype' is only allowed to have values from {}".format( ', '.join(list(REV_PHENOTYPE_MAP.keys())))) raise PedigreeError("Individual %s has wrong formated phenotype" % sample_id) ind_info['individual_id'] = sample_id ind_info['display_name'] = sample.get('sample_name', sample['sample_id']) ind_info['sex'] = sex ind_info['phenotype'] = phenotype ind_info['father'] = sample.get('father') ind_info['mother'] = sample.get('mother') ind_info['bam_file'] = sample.get('bam_path') ind_info['analysis_type'] = sample.get('analysis_type') ind_info['capture_kits'] = ([sample.get('capture_kit')] if 'capture_kit' in sample else []) return ind_info
def parse_ped(ped_stream, family_type="ped"): """Parse out minimal family information from a PED file. Args: ped_stream(iterable(str)) family_type(str): Format of the pedigree information Returns: family_id(str), samples(list[dict]) """ pedigree = FamilyParser(ped_stream, family_type=family_type) if len(pedigree.families) != 1: raise PedigreeError("Only one case per ped file is allowed") family_id = list(pedigree.families.keys())[0] family = pedigree.families[family_id] samples = [ { "sample_id": ind_id, "father": individual.father, "mother": individual.mother, # Convert sex to human readable "sex": SEX_MAP[individual.sex], "phenotype": PHENOTYPE_MAP[int(individual.phenotype)], } for ind_id, individual in family.individuals.items() ] return family_id, samples
def family_relations_consistent(cls, individuals): """Check family relationships. If configured parent exist. If individual(s) are configured""" individual_dicts = [i.dict() for i in individuals] if len(individual_dicts) == 0: raise PedigreeError("No samples could be found") all_ids = [i["individual_id"] for i in individual_dicts] # Check if relations are correct for parsed_ind in individual_dicts: father = parsed_ind.get("father") if father and father != "0": if father not in all_ids: raise PedigreeError("father %s does not exist in family" % father) mother = parsed_ind.get("mother") if mother and mother != "0": if mother not in all_ids: raise PedigreeError("mother %s does not exist in family" % mother) return individuals
def parse_ped(ped_stream, family_type='ped'): """Parse out minimal family information from a PED file.""" pedigree = FamilyParser(ped_stream, family_type=family_type) if len(pedigree.families) != 1: raise PedigreeError("Only one case per ped file is allowed") family_id = list(pedigree.families.keys())[0] family = pedigree.families[family_id] samples = [{ 'sample_id': ind_id, 'father': individual.father, 'mother': individual.mother, 'sex': SEX_MAP[individual.sex], 'phenotype': PHENOTYPE_MAP[int(individual.phenotype)], } for ind_id, individual in family.individuals.items()] return family_id, samples
def build_individual(ind): """Build a Individual object Args: ind (dict): A dictionary with individual information Returns: ind_obj (dict): A Individual object dict( individual_id = str, # required display_name = str, sex = str, phenotype = int, father = str, # Individual id of father mother = str, # Individual id of mother capture_kits = list, # List of names of capture kits bam_file = str, # Path to bam file analysis_type = str, # choices=ANALYSIS_TYPES ) """ try: ind_obj = dict(individual_id=ind['individual_id']) log.info("Building Individual with id:{0}".format( ind['individual_id'])) except KeyError as err: raise PedigreeError("Individual is missing individual_id") ind_obj['display_name'] = ind.get('display_name', ind_obj['individual_id']) sex = ind.get('sex', 'unknown') # Convert sex to .ped try: ind_obj['sex'] = str(REV_SEX_MAP[sex]) except KeyError as err: raise (PedigreeError("Unknown sex: %s" % sex)) phenotype = ind.get('phenotype', 'unknown') # Make the phenotype integers try: ped_phenotype = REV_PHENOTYPE_MAP[phenotype] if ped_phenotype == -9: ped_phenotype = 0 ind_obj['phenotype'] = ped_phenotype except KeyError as err: raise (PedigreeError("Unknown phenotype: %s" % phenotype)) ind_obj['father'] = ind.get('father') ind_obj['mother'] = ind.get('mother') ind_obj['capture_kits'] = ind.get('capture_kits', []) ind_obj['bam_file'] = ind.get('bam_file') ind_obj['confirmed_sex'] = ind.get('confirmed_sex') ind_obj['confirmed_parent'] = ind.get('confirmed_parent') ind_obj['predicted_ancestry'] = ind.get('predicted_ancestry') # Check if the analysis type is ok # Can be anyone of ('wgs', 'wes', 'mixed', 'unknown') analysis_type = ind.get('analysis_type', 'unknown') if not analysis_type in ANALYSIS_TYPES: raise PedigreeError("Analysis type %s not allowed", analysis_type) ind_obj['analysis_type'] = analysis_type return ind_obj
def build_individual(ind): """Build a Individual object Args: ind (dict): A dictionary with individual information Returns: ind_obj (dict): A Individual object Raises: PedigreeError: if sex is unknown, if phenotype is unknown, if analysis_type is unknwon, or missing individual_id dict( individual_id = str, # required display_name = str, sex = str, phenotype = int, father = str, # Individual id of father mother = str, # Individual id of mother capture_kits = list, # List of names of capture kits bam_file = str, # Path to bam file, rhocall_wig = str, # Path to a rhocall wig file showing heterozygosity levels rhocall_bed = str, # Path to a rhocall bed file marking LOH regions tiddit_coverage_wig = str, # Path to a TIDDIT coverage wig - overview coverage upd_regions_bed = str, # Path to a UPD regions bed marking UPD calls upd_sites_bed = str, # Path to a UPD sites bed, showing UPD info for vars vcf2cytosure = str, # Path to CGH file analysis_type = str, # choices=ANALYSIS_TYPES ) """ try: ind_obj = dict(individual_id=ind['individual_id']) log.info("Building Individual with id:{0}".format( ind['individual_id'])) except KeyError as err: raise PedigreeError("Individual is missing individual_id") # Use individual_id if display_name does not exist ind_obj['display_name'] = ind.get('display_name', ind_obj['individual_id']) sex = ind.get('sex', 'unknown') # Convert sex to .ped try: # Check if sex is coded as an integer int(sex) ind_obj['sex'] = str(sex) except ValueError as err: try: # Sex are numbers in the database ind_obj['sex'] = REV_SEX_MAP[sex] except KeyError as err: raise (PedigreeError("Unknown sex: %s" % sex)) phenotype = ind.get('phenotype', 'unknown') # Make the phenotype integers try: ped_phenotype = REV_PHENOTYPE_MAP[phenotype] if ped_phenotype == -9: ped_phenotype = 0 ind_obj['phenotype'] = ped_phenotype except KeyError as err: raise (PedigreeError("Unknown phenotype: %s" % phenotype)) # Fix absolute path for individual bam files (takes care of incomplete path for demo files) ind_files = [ 'bam_file', 'mt_bam', 'vcf2cytosure', 'rhocall_bed', 'rhocall_wig', 'tiddit_coverage_wig', 'upd_regions_bed', 'upd_sites_bed' ] for ind_file in ind_files: file_path = ind.get(ind_file) if file_path and os.path.exists(file_path): ind_obj[ind_file] = os.path.abspath(file_path) else: ind_obj[ind_file] = None ind_obj['father'] = ind.get('father') ind_obj['mother'] = ind.get('mother') ind_obj['capture_kits'] = ind.get('capture_kits', []) ind_obj['confirmed_sex'] = ind.get('confirmed_sex') ind_obj['confirmed_parent'] = ind.get('confirmed_parent') ind_obj['predicted_ancestry'] = ind.get('predicted_ancestry') # Check if the analysis type is ok # Can be anyone of ('wgs', 'wes', 'mixed', 'unknown') analysis_type = ind.get('analysis_type', 'unknown') if not analysis_type in ANALYSIS_TYPES: raise PedigreeError("Analysis type %s not allowed", analysis_type) ind_obj['analysis_type'] = analysis_type if 'tmb' in ind: ind_obj['tmb'] = ind['tmb'] if 'msi' in ind: ind_obj['msi'] = ind['msi'] if 'tumor_purity' in ind: ind_obj['tumor_purity'] = ind['tumor_purity'] if 'tumor_type' in ind: ind_obj['tumor_type'] = ind['tumor_type'] ind_obj['tissue_type'] = ind.get('tissue_type', 'unknown') return ind_obj
def parse_individual(sample): """Parse individual information Args: sample (dict) Returns: { 'individual_id': str, 'father': str, 'mother': str, 'display_name': str, 'sex': str, 'phenotype': str, 'bam_file': str, 'mt_bam': str, 'analysis_type': str, 'vcf2cytosure': str, 'capture_kits': list(str), 'upd_sites_bed': str, 'upd_regions_bed': str, 'rhocall_bed': str, 'rhocall_wig': str, 'tiddit_coverage_wig': str, 'predicted_ancestry' = str, 'is_sma': boolean, 'is_sma_carrier': boolean, 'smn1_cn' = int, 'smn2_cn' = int, 'smn2delta78_cn' = int, 'smn_27134_cn' = int, 'tumor_type': str, 'tmb': str, 'msi': str, 'tumor_purity': float, 'tissue_type': str, } """ ind_info = {} if "sample_id" not in sample: raise PedigreeError("One sample is missing 'sample_id'") sample_id = sample["sample_id"] # Check the sex if "sex" not in sample: raise PedigreeError("Sample %s is missing 'sex'" % sample_id) sex = sample["sex"] if sex not in REV_SEX_MAP: LOG.warning("'sex' is only allowed to have values from {}".format( ", ".join(list(REV_SEX_MAP.keys())))) raise PedigreeError("Individual %s has wrong formated sex" % sample_id) # Check the phenotype if "phenotype" not in sample: raise PedigreeError("Sample %s is missing 'phenotype'" % sample_id) phenotype = sample["phenotype"] if phenotype not in REV_PHENOTYPE_MAP: LOG.warning( "'phenotype' is only allowed to have values from {}".format( ", ".join(list(REV_PHENOTYPE_MAP.keys())))) raise PedigreeError("Individual %s has wrong formated phenotype" % sample_id) ind_info["individual_id"] = sample_id ind_info["display_name"] = sample.get("sample_name", sample["sample_id"]) ind_info["sex"] = sex ind_info["phenotype"] = phenotype ind_info["father"] = sample.get("father") ind_info["mother"] = sample.get("mother") ind_info["confirmed_parent"] = sample.get("confirmed_parent") ind_info["confirmed_sex"] = sample.get("confirmed_sex") ind_info["predicted_ancestry"] = sample.get("predicted_ancestry") # IGV files these can be bam or cram format bam_path_options = ["bam_path", "bam_file", "alignment_path"] for option in bam_path_options: if sample.get(option) and not sample.get(option).strip() == "": ind_info["bam_file"] = sample[option] break ind_info["rhocall_bed"] = sample.get("rhocall_bed", sample.get("rhocall_bed")) ind_info["rhocall_wig"] = sample.get("rhocall_wig", sample.get("rhocall_wig")) ind_info["tiddit_coverage_wig"] = sample.get( "tiddit_coverage_wig", sample.get("tiddit_coverage_wig")) ind_info["upd_regions_bed"] = sample.get("upd_regions_bed", sample.get("upd_regions_bed")) ind_info["upd_sites_bed"] = sample.get("upd_sites_bed", sample.get("upd_sites_bed")) ind_info["mt_bam"] = sample.get("mt_bam") ind_info["analysis_type"] = sample.get("analysis_type") # Path to downloadable vcf2cytosure file ind_info["vcf2cytosure"] = sample.get("vcf2cytosure") # load sma file if it is not done at this point! ind_info["is_sma"] = sample.get("is_sma", None) ind_info["is_sma_carrier"] = sample.get("is_sma_carrier", None) ind_info["smn1_cn"] = sample.get("smn1_cn", None) ind_info["smn2_cn"] = sample.get("smn2_cn", None) ind_info["smn2delta78_cn"] = sample.get("smn2delta78_cn", None) ind_info["smn_27134_cn"] = sample.get("smn_27134_cn", None) ind_info["capture_kits"] = ([sample.get("capture_kit")] if "capture_kit" in sample else []) # Cancer specific values ind_info["tumor_type"] = sample.get("tumor_type") # tumor_mutational_burden ind_info["tmb"] = sample.get("tmb") ind_info["msi"] = sample.get("msi") ind_info["tumor_purity"] = sample.get("tumor_purity") # might be a string-formatted fraction, example: 30/90 if isinstance(ind_info["tumor_purity"], str): ind_info["tumor_purity"] = float(Fraction(ind_info["tumor_purity"])) ind_info["tissue_type"] = sample.get("tissue_type") # Remove key-value pairs from ind_info where key==None and return return removeNoneValues(ind_info)
def build_individual(ind): """Build a Individual object Args: ind (dict): A dictionary with individual information Returns: ind_obj (dict): A Individual object Raises: PedigreeError: if sex is unknown, if phenotype is unknown, if analysis_type is unknwon, or missing individual_id dict( individual_id = str, # required display_name = str, sex = str, phenotype = int, father = str, # Individual id of father mother = str, # Individual id of mother capture_kits = list, # List of names of capture kits bam_file = str, # Path to bam file, rhocall_wig = str, # Path to a rhocall wig file showing heterozygosity levels rhocall_bed = str, # Path to a rhocall bed file marking LOH regions tiddit_coverage_wig = str, # Path to a TIDDIT coverage wig - overview coverage upd_regions_bed = str, # Path to a UPD regions bed marking UPD calls upd_sites_bed = str, # Path to a UPD sites bed, showing UPD info for vars vcf2cytosure = str, # Path to CGH file is_sma = boolean, is_sma_carrier = boolean, smn1_cn = int, smn2_cn = int, smn2delta78_cn = int, smn_27134_cn = int, predicted_ancestry = str, analysis_type = str, # choices=ANALYSIS_TYPES ) """ try: ind_obj = dict(individual_id=ind["individual_id"]) log.info("Building Individual with id:{0}".format(ind["individual_id"])) except KeyError as err: raise PedigreeError("Individual is missing individual_id") # Use individual_id if display_name does not exist ind_obj["display_name"] = ind.get("display_name", ind_obj["individual_id"]) sex = ind.get("sex", "unknown") # Convert sex to .ped try: # Check if sex is coded as an integer int(sex) ind_obj["sex"] = str(sex) except ValueError as err: try: # Sex are numbers in the database ind_obj["sex"] = REV_SEX_MAP[sex] except KeyError as err: raise (PedigreeError("Unknown sex: %s" % sex)) phenotype = ind.get("phenotype", "unknown") # Make the phenotype integers try: ped_phenotype = REV_PHENOTYPE_MAP[phenotype] if ped_phenotype == -9: ped_phenotype = 0 ind_obj["phenotype"] = ped_phenotype except KeyError as err: raise (PedigreeError("Unknown phenotype: %s" % phenotype)) # Fix absolute path for individual bam files (takes care of incomplete path for demo files) ind_files = [ "bam_file", "mt_bam", "vcf2cytosure", "rhocall_bed", "rhocall_wig", "tiddit_coverage_wig", "upd_regions_bed", "upd_sites_bed", ] for ind_file in ind_files: file_path = ind.get(ind_file) if file_path and os.path.exists(file_path): ind_obj[ind_file] = os.path.abspath(file_path) else: ind_obj[ind_file] = None ind_obj["father"] = ind.get("father") ind_obj["mother"] = ind.get("mother") ind_obj["capture_kits"] = ind.get("capture_kits", []) ind_obj["confirmed_sex"] = ind.get("confirmed_sex") ind_obj["confirmed_parent"] = ind.get("confirmed_parent") ind_obj["predicted_ancestry"] = ind.get("predicted_ancestry") # Check if the analysis type is ok # Can be anyone of ('wgs', 'wes', 'mixed', 'unknown') analysis_type = ind.get("analysis_type", "unknown") if not analysis_type in ANALYSIS_TYPES: raise PedigreeError("Analysis type %s not allowed", analysis_type) ind_obj["analysis_type"] = analysis_type if "tmb" in ind: ind_obj["tmb"] = ind["tmb"] if "msi" in ind: ind_obj["msi"] = ind["msi"] if "tumor_purity" in ind: ind_obj["tumor_purity"] = ind["tumor_purity"] if "tumor_type" in ind: ind_obj["tumor_type"] = ind["tumor_type"] ind_obj["tissue_type"] = ind.get("tissue_type", "unknown") # SMA ind_obj["is_sma"] = ind.get("is_sma", None) ind_obj["is_sma_carrier"] = ind.get("is_sma_carrier", None) ind_obj["smn1_cn"] = ind.get("smn1_cn", None) ind_obj["smn2_cn"] = ind.get("smn2_cn", None) ind_obj["smn2delta78_cn"] = ind.get("smn2delta78_cn", None) ind_obj["smn_27134_cn"] = ind.get("smn_27134_cn", None) return ind_obj
def parse_individual(sample): """Parse individual information Args: sample (dict) Returns: { 'individual_id': str, 'father': str, 'mother': str, 'display_name': str, 'sex': str, 'phenotype': str, 'bam_file': str, 'mt_bam': str, 'analysis_type': str, 'vcf2cytosure': str, 'capture_kits': list(str), 'tumor_type': str, 'tmb': str, 'msi': str, 'tumor_purity': str, 'tissue_type': str, } """ ind_info = {} if 'sample_id' not in sample: raise PedigreeError("One sample is missing 'sample_id'") sample_id = sample['sample_id'] # Check the sex if 'sex' not in sample: raise PedigreeError("Sample %s is missing 'sex'" % sample_id) sex = sample['sex'] if sex not in REV_SEX_MAP: log.warning("'sex' is only allowed to have values from {}".format( ', '.join(list(REV_SEX_MAP.keys())))) raise PedigreeError("Individual %s has wrong formated sex" % sample_id) # Check the phenotype if 'phenotype' not in sample: raise PedigreeError("Sample %s is missing 'phenotype'" % sample_id) phenotype = sample['phenotype'] if phenotype not in REV_PHENOTYPE_MAP: log.warning( "'phenotype' is only allowed to have values from {}".format( ', '.join(list(REV_PHENOTYPE_MAP.keys())))) raise PedigreeError("Individual %s has wrong formated phenotype" % sample_id) ind_info['individual_id'] = sample_id ind_info['display_name'] = sample.get('sample_name', sample['sample_id']) ind_info['sex'] = sex ind_info['phenotype'] = phenotype ind_info['father'] = sample.get('father') ind_info['mother'] = sample.get('mother') ind_info['confirmed_parent'] = sample.get('confirmed_parent') ind_info['confirmed_sex'] = sample.get('confirmed_sex') ind_info['predicted_ancestry'] = sample.get('predicted_ancestry') ind_info['bam_file'] = sample.get('bam_path') ind_info['mt_bam'] = sample.get('mt_bam') ind_info['analysis_type'] = sample.get('analysis_type') # Path to downloadable vcf2cytosure file ind_info['vcf2cytosure'] = sample.get('vcf2cytosure') ind_info['capture_kits'] = ([sample.get('capture_kit')] if 'capture_kit' in sample else []) # Cancer specific values ind_info['tumor_type'] = sample.get('tumor_type') # tumor_mutational_burden ind_info['tmb'] = sample.get('tmb') ind_info['msi'] = sample.get('msi') ind_info['tumor_purity'] = sample.get('tumor_purity') ind_info['tissue_type'] = sample.get('tissue_type') # Remove key-value pairs from ind_info where key==None and return return removeNoneValues(ind_info)
def build_case(case_data, adapter): """Build a case object that is to be inserted to the database Args: case_data (dict): A dictionary with the relevant case information adapter (scout.adapter.MongoAdapter) Returns: case_obj (dict): A case object dict( case_id = str, # required=True, unique display_name = str, # If not display name use case_id owner = str, # required # These are the names of all the collaborators that are allowed to view the # case, including the owner collaborators = list, # List of institute_ids assignee = str, # _id of a user individuals = list, # list of dictionaries with individuals created_at = datetime, updated_at = datetime, suspects = list, # List of variants referred by there _id causatives = list, # List of variants referred by there _id synopsis = str, # The synopsis is a text blob status = str, # default='inactive', choices=STATUS is_research = bool, # default=False research_requested = bool, # default=False rerun_requested = bool, # default=False analysis_date = datetime, analysis_dates = list, # list of datetimes # default_panels specifies which panels that should be shown when # the case is opened panels = list, # list of dictionaries with panel information dynamic_gene_list = list, # List of genes genome_build = str, # This should be 37 or 38 genome_version = float, # What version of the build rank_model_version = float, rank_score_threshold = int, # default=8 phenotype_terms = list, # List of dictionaries with phenotype information phenotype_groups = list, # List of dictionaries with phenotype information madeline_info = str, # madeline info is a full xml file vcf_files = dict, # A dictionary with vcf files diagnosis_phenotypes = list, # List of references to diseases diagnosis_genes = list, # List of references to genes has_svvariants = bool, # default=False is_migrated = bool # default=False ) """ try: log.info("build case with id: {0}".format(case_data['case_id'])) case_obj = {'_id': case_data['case_id']} case_obj['case_id'] = case_data['case_id'] except KeyError as err: raise PedigreeError("Case has to have a case id") case_obj['display_name'] = case_data.get('display_name', case_obj['case_id']) # Check if institute exists in database try: institute_id = case_data['owner'] except KeyError as err: raise ConfigError("Case has to have a institute") institute_obj = adapter.institute(institute_id) if not institute_obj: raise IntegrityError("Institute %s not found in database" % institute_id) case_obj['owner'] = case_data['owner'] # Owner allways has to be part of collaborators collaborators = set(case_data.get('collaborators', [])) collaborators.add(case_data['owner']) case_obj['collaborators'] = list(collaborators) if case_data.get('assignee'): case_obj['assignees'] = [case_data['assignee']] # Individuals ind_objs = [] try: for individual in case_data.get('individuals', []): ind_objs.append(build_individual(individual)) except Exception as error: ## TODO add some action here raise error # sort the samples to put the affected individual first sorted_inds = sorted(ind_objs, key=lambda ind: -ind['phenotype']) case_obj['individuals'] = sorted_inds now = datetime.now() case_obj['created_at'] = now case_obj['updated_at'] = now if case_data.get('suspects'): case_obj['suspects'] = case_data['suspects'] if case_data.get('causatives'): case_obj['causatives'] = case_data['causatives'] case_obj['synopsis'] = case_data.get('synopsis', '') case_obj['status'] = 'inactive' case_obj['is_research'] = False case_obj['research_requested'] = False case_obj['rerun_requested'] = False analysis_date = case_data.get('analysis_date') if analysis_date: case_obj['analysis_date'] = analysis_date case_obj['analysis_dates'] = [analysis_date] # We store some metadata and references about gene panels in 'panels' case_panels = case_data.get('gene_panels', []) default_panels = case_data.get('default_panels', []) panels = [] for panel_name in case_panels: panel_obj = adapter.gene_panel(panel_name) if not panel_obj: raise IntegrityError("Panel %s does not exist in database" % panel_name) panel = { 'panel_id': panel_obj['_id'], 'panel_name': panel_obj['panel_name'], 'display_name': panel_obj['display_name'], 'version': panel_obj['version'], 'updated_at': panel_obj['date'], 'nr_genes': len(panel_obj['genes']) } if panel_name in default_panels: panel['is_default'] = True else: panel['is_default'] = False panels.append(panel) case_obj['panels'] = panels case_obj['dynamic_gene_list'] = {} # Meta data genome_build = case_data.get('genome_build', '37') if not genome_build in ['37', '38']: pass ##TODO raise exception if invalid genome build was used case_obj['genome_build'] = genome_build case_obj['genome_version'] = case_data.get('genome_version') if case_data.get('rank_model_version'): case_obj['rank_model_version'] = float(case_data['rank_model_version']) if case_data.get('rank_score_threshold'): case_obj['rank_score_threshold'] = float(case_data['rank_score_threshold']) # phenotype information phenotypes = [] for phenotype in case_data.get('phenotype_terms', []): phenotype_obj = build_phenotype(phenotype, adapter) if phenotype_obj: phenotypes.append[phenotype_obj] if phenotypes: case_obj['phenotype_terms'] = phenotypes # phenotype groups phenotype_groups = [] for phenotype in case_data.get('phenotype_groups', []): phenotype_obj = build_phenotype(phenotype, adapter) if phenotype_obj: phenotype_groups.append[phenotype_obj] if phenotype_groups: case_obj['phenotype_groups'] = phenotype_groups # Files case_obj['madeline_info'] = case_data.get('madeline_info') case_obj['vcf_files'] = case_data.get('vcf_files', {}) case_obj['has_svvariants'] = False if (case_obj['vcf_files'].get('vcf_sv') or case_obj['vcf_files'].get('vcf_sv_research')): case_obj['has_svvariants'] = True case_obj['is_migrated'] = False return case_obj