Ejemplo n.º 1
0
def parse_individuals(samples):
    """Parse the individual information

        Reformat sample information to proper individuals

        Args:
            samples(list(dict))

        Returns:
            individuals(list(dict))
    """
    individuals = []
    if len(samples) == 0:
        raise PedigreeError("No samples could be found")

    ind_ids = set()
    for sample_info in samples:
        parsed_ind = parse_individual(sample_info)
        individuals.append(parsed_ind)
        ind_ids.add(parsed_ind['individual_id'])

    # Check if relations are correct
    for parsed_ind in individuals:
        father = parsed_ind['father']
        if (father and father != '0'):
            if father not in ind_ids:
                raise PedigreeError('father %s does not exist in family' %
                                    father)
        mother = parsed_ind['mother']
        if (mother and mother != '0'):
            if mother not in ind_ids:
                raise PedigreeError('mother %s does not exist in family' %
                                    mother)

    return individuals
Ejemplo n.º 2
0
def parse_individual(sample):
    """Parse individual information

        Args:
            sample (dict)

        Returns:
            {
                'individual_id': str,
                'father': str,
                'mother': str,
                'display_name': str,
                'sex': str,
                'phenotype': str,
                'bam_file': str,
                'analysis_type': str,
                'capture_kits': list(str),
            }

    """
    ind_info = {}
    if 'sample_id' not in sample:
        raise PedigreeError("One sample is missing 'sample_id'")
    sample_id = sample['sample_id']
    # Check the sex
    if 'sex' not in sample:
        raise PedigreeError("Sample %s is missing 'sex'" % sample_id)
    sex = sample['sex']
    if sex not in REV_SEX_MAP:
        logger.warning("'sex' is only allowed to have values from {}".format(
            ', '.join(list(REV_SEX_MAP.keys()))))
        raise PedigreeError("Individual %s has wrong formated sex" % sample_id)

    # Check the phenotype
    if 'phenotype' not in sample:
        raise PedigreeError("Sample %s is missing 'phenotype'" % sample_id)
    phenotype = sample['phenotype']
    if phenotype not in REV_PHENOTYPE_MAP:
        logger.warning(
            "'phenotype' is only allowed to have values from {}".format(
                ', '.join(list(REV_PHENOTYPE_MAP.keys()))))
        raise PedigreeError("Individual %s has wrong formated phenotype" %
                            sample_id)

    ind_info['individual_id'] = sample_id
    ind_info['display_name'] = sample.get('sample_name', sample['sample_id'])

    ind_info['sex'] = sex
    ind_info['phenotype'] = phenotype

    ind_info['father'] = sample.get('father')
    ind_info['mother'] = sample.get('mother')

    ind_info['bam_file'] = sample.get('bam_path')
    ind_info['analysis_type'] = sample.get('analysis_type')
    ind_info['capture_kits'] = ([sample.get('capture_kit')]
                                if 'capture_kit' in sample else [])

    return ind_info
Ejemplo n.º 3
0
def parse_ped(ped_stream, family_type="ped"):
    """Parse out minimal family information from a PED file.

    Args:
        ped_stream(iterable(str))
        family_type(str): Format of the pedigree information

    Returns:
        family_id(str), samples(list[dict])
    """
    pedigree = FamilyParser(ped_stream, family_type=family_type)

    if len(pedigree.families) != 1:
        raise PedigreeError("Only one case per ped file is allowed")

    family_id = list(pedigree.families.keys())[0]
    family = pedigree.families[family_id]

    samples = [
        {
            "sample_id": ind_id,
            "father": individual.father,
            "mother": individual.mother,
            # Convert sex to human readable
            "sex": SEX_MAP[individual.sex],
            "phenotype": PHENOTYPE_MAP[int(individual.phenotype)],
        } for ind_id, individual in family.individuals.items()
    ]

    return family_id, samples
Ejemplo n.º 4
0
 def family_relations_consistent(cls, individuals):
     """Check family relationships. If configured parent exist. If
     individual(s) are configured"""
     individual_dicts = [i.dict() for i in individuals]
     if len(individual_dicts) == 0:
         raise PedigreeError("No samples could be found")
     all_ids = [i["individual_id"] for i in individual_dicts]
     # Check if relations are correct
     for parsed_ind in individual_dicts:
         father = parsed_ind.get("father")
         if father and father != "0":
             if father not in all_ids:
                 raise PedigreeError("father %s does not exist in family" %
                                     father)
         mother = parsed_ind.get("mother")
         if mother and mother != "0":
             if mother not in all_ids:
                 raise PedigreeError("mother %s does not exist in family" %
                                     mother)
     return individuals
Ejemplo n.º 5
0
def parse_ped(ped_stream, family_type='ped'):
    """Parse out minimal family information from a PED file."""
    pedigree = FamilyParser(ped_stream, family_type=family_type)

    if len(pedigree.families) != 1:
        raise PedigreeError("Only one case per ped file is allowed")

    family_id = list(pedigree.families.keys())[0]
    family = pedigree.families[family_id]

    samples = [{
        'sample_id': ind_id,
        'father': individual.father,
        'mother': individual.mother,
        'sex': SEX_MAP[individual.sex],
        'phenotype': PHENOTYPE_MAP[int(individual.phenotype)],
    } for ind_id, individual in family.individuals.items()]

    return family_id, samples
Ejemplo n.º 6
0
def build_individual(ind):
    """Build a Individual object

        Args:
            ind (dict): A dictionary with individual information

        Returns:
            ind_obj (dict): A Individual object
    
        dict(
            individual_id = str, # required
            display_name = str,
            sex = str, 
            phenotype = int, 
            father = str, # Individual id of father
            mother = str, # Individual id of mother
            capture_kits = list, # List of names of capture kits
            bam_file = str, # Path to bam file
            analysis_type = str, # choices=ANALYSIS_TYPES
        )
        
    """

    try:
        ind_obj = dict(individual_id=ind['individual_id'])
        log.info("Building Individual with id:{0}".format(
            ind['individual_id']))
    except KeyError as err:
        raise PedigreeError("Individual is missing individual_id")

    ind_obj['display_name'] = ind.get('display_name', ind_obj['individual_id'])

    sex = ind.get('sex', 'unknown')
    # Convert sex to .ped
    try:
        ind_obj['sex'] = str(REV_SEX_MAP[sex])
    except KeyError as err:
        raise (PedigreeError("Unknown sex: %s" % sex))

    phenotype = ind.get('phenotype', 'unknown')
    # Make the phenotype integers
    try:
        ped_phenotype = REV_PHENOTYPE_MAP[phenotype]
        if ped_phenotype == -9:
            ped_phenotype = 0
        ind_obj['phenotype'] = ped_phenotype
    except KeyError as err:
        raise (PedigreeError("Unknown phenotype: %s" % phenotype))

    ind_obj['father'] = ind.get('father')
    ind_obj['mother'] = ind.get('mother')
    ind_obj['capture_kits'] = ind.get('capture_kits', [])
    ind_obj['bam_file'] = ind.get('bam_file')
    ind_obj['confirmed_sex'] = ind.get('confirmed_sex')
    ind_obj['confirmed_parent'] = ind.get('confirmed_parent')
    ind_obj['predicted_ancestry'] = ind.get('predicted_ancestry')

    # Check if the analysis type is ok
    # Can be anyone of ('wgs', 'wes', 'mixed', 'unknown')
    analysis_type = ind.get('analysis_type', 'unknown')
    if not analysis_type in ANALYSIS_TYPES:
        raise PedigreeError("Analysis type %s not allowed", analysis_type)
    ind_obj['analysis_type'] = analysis_type

    return ind_obj
Ejemplo n.º 7
0
def build_individual(ind):
    """Build a Individual object

        Args:
            ind (dict): A dictionary with individual information

        Returns:
            ind_obj (dict): A Individual object

        Raises:
            PedigreeError: if sex is unknown,
            if phenotype is unknown,
            if analysis_type is unknwon,
            or missing individual_id

        dict(
            individual_id = str, # required
            display_name = str,
            sex = str,
            phenotype = int,
            father = str, # Individual id of father
            mother = str, # Individual id of mother
            capture_kits = list, # List of names of capture kits
            bam_file = str, # Path to bam file,
            rhocall_wig = str, # Path to a rhocall wig file showing heterozygosity levels
            rhocall_bed = str, # Path to a rhocall bed file marking LOH regions
            tiddit_coverage_wig = str, # Path to a TIDDIT coverage wig - overview coverage
            upd_regions_bed = str, # Path to a UPD regions bed marking UPD calls
            upd_sites_bed = str, # Path to a UPD sites bed, showing UPD info for vars
            vcf2cytosure = str, # Path to CGH file
            analysis_type = str, # choices=ANALYSIS_TYPES
        )
    """

    try:
        ind_obj = dict(individual_id=ind['individual_id'])
        log.info("Building Individual with id:{0}".format(
            ind['individual_id']))
    except KeyError as err:
        raise PedigreeError("Individual is missing individual_id")

    # Use individual_id if display_name does not exist
    ind_obj['display_name'] = ind.get('display_name', ind_obj['individual_id'])

    sex = ind.get('sex', 'unknown')
    # Convert sex to .ped
    try:
        # Check if sex is coded as an integer
        int(sex)
        ind_obj['sex'] = str(sex)
    except ValueError as err:
        try:
            # Sex are numbers in the database
            ind_obj['sex'] = REV_SEX_MAP[sex]
        except KeyError as err:
            raise (PedigreeError("Unknown sex: %s" % sex))

    phenotype = ind.get('phenotype', 'unknown')
    # Make the phenotype integers
    try:
        ped_phenotype = REV_PHENOTYPE_MAP[phenotype]
        if ped_phenotype == -9:
            ped_phenotype = 0
        ind_obj['phenotype'] = ped_phenotype
    except KeyError as err:
        raise (PedigreeError("Unknown phenotype: %s" % phenotype))

    # Fix absolute path for individual bam files (takes care of incomplete path for demo files)
    ind_files = [
        'bam_file', 'mt_bam', 'vcf2cytosure', 'rhocall_bed', 'rhocall_wig',
        'tiddit_coverage_wig', 'upd_regions_bed', 'upd_sites_bed'
    ]

    for ind_file in ind_files:
        file_path = ind.get(ind_file)
        if file_path and os.path.exists(file_path):
            ind_obj[ind_file] = os.path.abspath(file_path)
        else:
            ind_obj[ind_file] = None

    ind_obj['father'] = ind.get('father')
    ind_obj['mother'] = ind.get('mother')
    ind_obj['capture_kits'] = ind.get('capture_kits', [])
    ind_obj['confirmed_sex'] = ind.get('confirmed_sex')
    ind_obj['confirmed_parent'] = ind.get('confirmed_parent')
    ind_obj['predicted_ancestry'] = ind.get('predicted_ancestry')

    # Check if the analysis type is ok
    # Can be anyone of ('wgs', 'wes', 'mixed', 'unknown')
    analysis_type = ind.get('analysis_type', 'unknown')
    if not analysis_type in ANALYSIS_TYPES:
        raise PedigreeError("Analysis type %s not allowed", analysis_type)
    ind_obj['analysis_type'] = analysis_type

    if 'tmb' in ind:
        ind_obj['tmb'] = ind['tmb']

    if 'msi' in ind:
        ind_obj['msi'] = ind['msi']

    if 'tumor_purity' in ind:
        ind_obj['tumor_purity'] = ind['tumor_purity']

    if 'tumor_type' in ind:
        ind_obj['tumor_type'] = ind['tumor_type']

    ind_obj['tissue_type'] = ind.get('tissue_type', 'unknown')

    return ind_obj
Ejemplo n.º 8
0
def parse_individual(sample):
    """Parse individual information

        Args:
            sample (dict)

        Returns:
            {
                'individual_id': str,
                'father': str,
                'mother': str,
                'display_name': str,
                'sex': str,
                'phenotype': str,
                'bam_file': str,
                'mt_bam': str,
                'analysis_type': str,
                'vcf2cytosure': str,
                'capture_kits': list(str),

                'upd_sites_bed': str,
                'upd_regions_bed': str,
                'rhocall_bed': str,
                'rhocall_wig': str,
                'tiddit_coverage_wig': str,

                'predicted_ancestry' = str,

                'is_sma': boolean,
                'is_sma_carrier': boolean,
                'smn1_cn' = int,
                'smn2_cn' = int,
                'smn2delta78_cn' = int,
                'smn_27134_cn' = int,

                'tumor_type': str,
                'tmb': str,
                'msi': str,
                'tumor_purity': float,
                'tissue_type': str,
            }

    """
    ind_info = {}
    if "sample_id" not in sample:
        raise PedigreeError("One sample is missing 'sample_id'")
    sample_id = sample["sample_id"]
    # Check the sex
    if "sex" not in sample:
        raise PedigreeError("Sample %s is missing 'sex'" % sample_id)
    sex = sample["sex"]
    if sex not in REV_SEX_MAP:
        LOG.warning("'sex' is only allowed to have values from {}".format(
            ", ".join(list(REV_SEX_MAP.keys()))))
        raise PedigreeError("Individual %s has wrong formated sex" % sample_id)

    # Check the phenotype
    if "phenotype" not in sample:
        raise PedigreeError("Sample %s is missing 'phenotype'" % sample_id)
    phenotype = sample["phenotype"]
    if phenotype not in REV_PHENOTYPE_MAP:
        LOG.warning(
            "'phenotype' is only allowed to have values from {}".format(
                ", ".join(list(REV_PHENOTYPE_MAP.keys()))))
        raise PedigreeError("Individual %s has wrong formated phenotype" %
                            sample_id)

    ind_info["individual_id"] = sample_id
    ind_info["display_name"] = sample.get("sample_name", sample["sample_id"])

    ind_info["sex"] = sex
    ind_info["phenotype"] = phenotype

    ind_info["father"] = sample.get("father")
    ind_info["mother"] = sample.get("mother")

    ind_info["confirmed_parent"] = sample.get("confirmed_parent")
    ind_info["confirmed_sex"] = sample.get("confirmed_sex")
    ind_info["predicted_ancestry"] = sample.get("predicted_ancestry")

    # IGV files these can be bam or cram format
    bam_path_options = ["bam_path", "bam_file", "alignment_path"]
    for option in bam_path_options:
        if sample.get(option) and not sample.get(option).strip() == "":
            ind_info["bam_file"] = sample[option]
            break

    ind_info["rhocall_bed"] = sample.get("rhocall_bed",
                                         sample.get("rhocall_bed"))
    ind_info["rhocall_wig"] = sample.get("rhocall_wig",
                                         sample.get("rhocall_wig"))
    ind_info["tiddit_coverage_wig"] = sample.get(
        "tiddit_coverage_wig", sample.get("tiddit_coverage_wig"))
    ind_info["upd_regions_bed"] = sample.get("upd_regions_bed",
                                             sample.get("upd_regions_bed"))
    ind_info["upd_sites_bed"] = sample.get("upd_sites_bed",
                                           sample.get("upd_sites_bed"))
    ind_info["mt_bam"] = sample.get("mt_bam")
    ind_info["analysis_type"] = sample.get("analysis_type")

    # Path to downloadable vcf2cytosure file
    ind_info["vcf2cytosure"] = sample.get("vcf2cytosure")

    # load sma file if it is not done at this point!
    ind_info["is_sma"] = sample.get("is_sma", None)
    ind_info["is_sma_carrier"] = sample.get("is_sma_carrier", None)
    ind_info["smn1_cn"] = sample.get("smn1_cn", None)
    ind_info["smn2_cn"] = sample.get("smn2_cn", None)
    ind_info["smn2delta78_cn"] = sample.get("smn2delta78_cn", None)
    ind_info["smn_27134_cn"] = sample.get("smn_27134_cn", None)

    ind_info["capture_kits"] = ([sample.get("capture_kit")]
                                if "capture_kit" in sample else [])

    # Cancer specific values
    ind_info["tumor_type"] = sample.get("tumor_type")
    # tumor_mutational_burden
    ind_info["tmb"] = sample.get("tmb")
    ind_info["msi"] = sample.get("msi")

    ind_info["tumor_purity"] = sample.get("tumor_purity")
    # might be a string-formatted fraction, example: 30/90
    if isinstance(ind_info["tumor_purity"], str):
        ind_info["tumor_purity"] = float(Fraction(ind_info["tumor_purity"]))

    ind_info["tissue_type"] = sample.get("tissue_type")

    # Remove key-value pairs from ind_info where key==None and return
    return removeNoneValues(ind_info)
Ejemplo n.º 9
0
def build_individual(ind):
    """Build a Individual object

        Args:
            ind (dict): A dictionary with individual information

        Returns:
            ind_obj (dict): A Individual object

        Raises:
            PedigreeError: if sex is unknown,
            if phenotype is unknown,
            if analysis_type is unknwon,
            or missing individual_id

        dict(
            individual_id = str, # required
            display_name = str,
            sex = str,
            phenotype = int,
            father = str, # Individual id of father
            mother = str, # Individual id of mother
            capture_kits = list, # List of names of capture kits
            bam_file = str, # Path to bam file,
            rhocall_wig = str, # Path to a rhocall wig file showing heterozygosity levels
            rhocall_bed = str, # Path to a rhocall bed file marking LOH regions
            tiddit_coverage_wig = str, # Path to a TIDDIT coverage wig - overview coverage
            upd_regions_bed = str, # Path to a UPD regions bed marking UPD calls
            upd_sites_bed = str, # Path to a UPD sites bed, showing UPD info for vars
            vcf2cytosure = str, # Path to CGH file
            is_sma = boolean,
            is_sma_carrier = boolean,
            smn1_cn = int,
            smn2_cn = int,
            smn2delta78_cn = int,
            smn_27134_cn = int,
            predicted_ancestry = str,
            analysis_type = str, # choices=ANALYSIS_TYPES
        )
    """

    try:
        ind_obj = dict(individual_id=ind["individual_id"])
        log.info("Building Individual with id:{0}".format(ind["individual_id"]))
    except KeyError as err:
        raise PedigreeError("Individual is missing individual_id")

    # Use individual_id if display_name does not exist
    ind_obj["display_name"] = ind.get("display_name", ind_obj["individual_id"])

    sex = ind.get("sex", "unknown")
    # Convert sex to .ped
    try:
        # Check if sex is coded as an integer
        int(sex)
        ind_obj["sex"] = str(sex)
    except ValueError as err:
        try:
            # Sex are numbers in the database
            ind_obj["sex"] = REV_SEX_MAP[sex]
        except KeyError as err:
            raise (PedigreeError("Unknown sex: %s" % sex))

    phenotype = ind.get("phenotype", "unknown")
    # Make the phenotype integers
    try:
        ped_phenotype = REV_PHENOTYPE_MAP[phenotype]
        if ped_phenotype == -9:
            ped_phenotype = 0
        ind_obj["phenotype"] = ped_phenotype
    except KeyError as err:
        raise (PedigreeError("Unknown phenotype: %s" % phenotype))

    # Fix absolute path for individual bam files (takes care of incomplete path for demo files)
    ind_files = [
        "bam_file",
        "mt_bam",
        "vcf2cytosure",
        "rhocall_bed",
        "rhocall_wig",
        "tiddit_coverage_wig",
        "upd_regions_bed",
        "upd_sites_bed",
    ]

    for ind_file in ind_files:
        file_path = ind.get(ind_file)
        if file_path and os.path.exists(file_path):
            ind_obj[ind_file] = os.path.abspath(file_path)
        else:
            ind_obj[ind_file] = None

    ind_obj["father"] = ind.get("father")
    ind_obj["mother"] = ind.get("mother")
    ind_obj["capture_kits"] = ind.get("capture_kits", [])
    ind_obj["confirmed_sex"] = ind.get("confirmed_sex")
    ind_obj["confirmed_parent"] = ind.get("confirmed_parent")
    ind_obj["predicted_ancestry"] = ind.get("predicted_ancestry")

    # Check if the analysis type is ok
    # Can be anyone of ('wgs', 'wes', 'mixed', 'unknown')
    analysis_type = ind.get("analysis_type", "unknown")
    if not analysis_type in ANALYSIS_TYPES:
        raise PedigreeError("Analysis type %s not allowed", analysis_type)
    ind_obj["analysis_type"] = analysis_type

    if "tmb" in ind:
        ind_obj["tmb"] = ind["tmb"]

    if "msi" in ind:
        ind_obj["msi"] = ind["msi"]

    if "tumor_purity" in ind:
        ind_obj["tumor_purity"] = ind["tumor_purity"]

    if "tumor_type" in ind:
        ind_obj["tumor_type"] = ind["tumor_type"]

    ind_obj["tissue_type"] = ind.get("tissue_type", "unknown")

    # SMA
    ind_obj["is_sma"] = ind.get("is_sma", None)
    ind_obj["is_sma_carrier"] = ind.get("is_sma_carrier", None)
    ind_obj["smn1_cn"] = ind.get("smn1_cn", None)
    ind_obj["smn2_cn"] = ind.get("smn2_cn", None)
    ind_obj["smn2delta78_cn"] = ind.get("smn2delta78_cn", None)
    ind_obj["smn_27134_cn"] = ind.get("smn_27134_cn", None)

    return ind_obj
Ejemplo n.º 10
0
def parse_individual(sample):
    """Parse individual information

        Args:
            sample (dict)

        Returns:
            {
                'individual_id': str,
                'father': str,
                'mother': str,
                'display_name': str,
                'sex': str,
                'phenotype': str,
                'bam_file': str,
                'mt_bam': str,
                'analysis_type': str,
                'vcf2cytosure': str,
                'capture_kits': list(str),

                'tumor_type': str,
                'tmb': str,
                'msi': str,
                'tumor_purity': str,
                'tissue_type': str,
            }

    """
    ind_info = {}
    if 'sample_id' not in sample:
        raise PedigreeError("One sample is missing 'sample_id'")
    sample_id = sample['sample_id']
    # Check the sex
    if 'sex' not in sample:
        raise PedigreeError("Sample %s is missing 'sex'" % sample_id)
    sex = sample['sex']
    if sex not in REV_SEX_MAP:
        log.warning("'sex' is only allowed to have values from {}".format(
            ', '.join(list(REV_SEX_MAP.keys()))))
        raise PedigreeError("Individual %s has wrong formated sex" % sample_id)

    # Check the phenotype
    if 'phenotype' not in sample:
        raise PedigreeError("Sample %s is missing 'phenotype'" % sample_id)
    phenotype = sample['phenotype']
    if phenotype not in REV_PHENOTYPE_MAP:
        log.warning(
            "'phenotype' is only allowed to have values from {}".format(
                ', '.join(list(REV_PHENOTYPE_MAP.keys()))))
        raise PedigreeError("Individual %s has wrong formated phenotype" %
                            sample_id)

    ind_info['individual_id'] = sample_id
    ind_info['display_name'] = sample.get('sample_name', sample['sample_id'])

    ind_info['sex'] = sex
    ind_info['phenotype'] = phenotype

    ind_info['father'] = sample.get('father')
    ind_info['mother'] = sample.get('mother')

    ind_info['confirmed_parent'] = sample.get('confirmed_parent')
    ind_info['confirmed_sex'] = sample.get('confirmed_sex')
    ind_info['predicted_ancestry'] = sample.get('predicted_ancestry')

    ind_info['bam_file'] = sample.get('bam_path')

    ind_info['mt_bam'] = sample.get('mt_bam')
    ind_info['analysis_type'] = sample.get('analysis_type')

    # Path to downloadable vcf2cytosure file
    ind_info['vcf2cytosure'] = sample.get('vcf2cytosure')

    ind_info['capture_kits'] = ([sample.get('capture_kit')]
                                if 'capture_kit' in sample else [])

    # Cancer specific values
    ind_info['tumor_type'] = sample.get('tumor_type')
    # tumor_mutational_burden
    ind_info['tmb'] = sample.get('tmb')
    ind_info['msi'] = sample.get('msi')
    ind_info['tumor_purity'] = sample.get('tumor_purity')
    ind_info['tissue_type'] = sample.get('tissue_type')

    # Remove key-value pairs from ind_info where key==None and return
    return removeNoneValues(ind_info)
Ejemplo n.º 11
0
def build_case(case_data, adapter):
    """Build a case object that is to be inserted to the database

    Args:
        case_data (dict): A dictionary with the relevant case information
        adapter (scout.adapter.MongoAdapter)

    Returns:
        case_obj (dict): A case object

    dict(
        case_id = str, # required=True, unique
        display_name = str, # If not display name use case_id
        owner = str, # required

        # These are the names of all the collaborators that are allowed to view the
        # case, including the owner
        collaborators = list, # List of institute_ids
        assignee = str, # _id of a user
        individuals = list, # list of dictionaries with individuals
        created_at = datetime,
        updated_at = datetime,
        suspects = list, # List of variants referred by there _id
        causatives = list, # List of variants referred by there _id

        synopsis = str, # The synopsis is a text blob
        status = str, # default='inactive', choices=STATUS
        is_research = bool, # default=False
        research_requested = bool, # default=False
        rerun_requested = bool, # default=False

        analysis_date = datetime,
        analysis_dates = list, # list of datetimes

        # default_panels specifies which panels that should be shown when
        # the case is opened
        panels = list, # list of dictionaries with panel information

        dynamic_gene_list = list, # List of genes

        genome_build = str, # This should be 37 or 38
        genome_version = float, # What version of the build

        rank_model_version = float,
        rank_score_threshold = int, # default=8

        phenotype_terms = list, # List of dictionaries with phenotype information
        phenotype_groups = list, # List of dictionaries with phenotype information

        madeline_info = str, # madeline info is a full xml file

        vcf_files = dict, # A dictionary with vcf files

        diagnosis_phenotypes = list, # List of references to diseases
        diagnosis_genes = list, # List of references to genes

        has_svvariants = bool, # default=False

        is_migrated = bool # default=False

    )
    """
    try:
        log.info("build case with id: {0}".format(case_data['case_id']))
        case_obj = {'_id': case_data['case_id']}
        case_obj['case_id'] = case_data['case_id']
    except KeyError as err:
        raise PedigreeError("Case has to have a case id")

    case_obj['display_name'] = case_data.get('display_name', case_obj['case_id'])

    # Check if institute exists in database
    try:
        institute_id = case_data['owner']
    except KeyError as err:
        raise ConfigError("Case has to have a institute")
    institute_obj = adapter.institute(institute_id)
    if not institute_obj:
        raise IntegrityError("Institute %s not found in database" % institute_id)
    case_obj['owner'] = case_data['owner']

    # Owner allways has to be part of collaborators
    collaborators = set(case_data.get('collaborators', []))
    collaborators.add(case_data['owner'])
    case_obj['collaborators'] = list(collaborators)

    if case_data.get('assignee'):
        case_obj['assignees'] = [case_data['assignee']]

    # Individuals
    ind_objs = []
    try:
        for individual in case_data.get('individuals', []):
            ind_objs.append(build_individual(individual))
    except Exception as error:
        ## TODO add some action here
        raise error
    # sort the samples to put the affected individual first
    sorted_inds = sorted(ind_objs, key=lambda ind: -ind['phenotype'])
    case_obj['individuals'] = sorted_inds

    now = datetime.now()
    case_obj['created_at'] = now
    case_obj['updated_at'] = now

    if case_data.get('suspects'):
        case_obj['suspects'] = case_data['suspects']
    if case_data.get('causatives'):
        case_obj['causatives'] = case_data['causatives']

    case_obj['synopsis'] = case_data.get('synopsis', '')

    case_obj['status'] = 'inactive'
    case_obj['is_research'] = False
    case_obj['research_requested'] = False
    case_obj['rerun_requested'] = False

    analysis_date = case_data.get('analysis_date')
    if analysis_date:
        case_obj['analysis_date'] = analysis_date
        case_obj['analysis_dates'] = [analysis_date]

    # We store some metadata and references about gene panels in 'panels'
    case_panels = case_data.get('gene_panels', [])
    default_panels = case_data.get('default_panels', [])
    panels = []

    for panel_name in case_panels:
        panel_obj = adapter.gene_panel(panel_name)
        if not panel_obj:
            raise IntegrityError("Panel %s does not exist in database" % panel_name)
        panel = {
            'panel_id': panel_obj['_id'],
            'panel_name': panel_obj['panel_name'],
            'display_name': panel_obj['display_name'],
            'version': panel_obj['version'],
            'updated_at': panel_obj['date'],
            'nr_genes': len(panel_obj['genes'])
        }
        if panel_name in default_panels:
            panel['is_default'] = True
        else:
            panel['is_default'] = False
        panels.append(panel)

    case_obj['panels'] = panels

    case_obj['dynamic_gene_list'] = {}

    # Meta data
    genome_build = case_data.get('genome_build', '37')
    if not genome_build in ['37', '38']:
        pass
        ##TODO raise exception if invalid genome build was used

    case_obj['genome_build'] = genome_build
    case_obj['genome_version'] = case_data.get('genome_version')

    if case_data.get('rank_model_version'):
        case_obj['rank_model_version'] = float(case_data['rank_model_version'])

    if case_data.get('rank_score_threshold'):
        case_obj['rank_score_threshold'] = float(case_data['rank_score_threshold'])

    # phenotype information
    phenotypes = []
    for phenotype in case_data.get('phenotype_terms', []):
        phenotype_obj = build_phenotype(phenotype, adapter)
        if phenotype_obj:
            phenotypes.append[phenotype_obj]
    if phenotypes:
        case_obj['phenotype_terms'] = phenotypes

    # phenotype groups
    phenotype_groups = []
    for phenotype in case_data.get('phenotype_groups', []):
        phenotype_obj = build_phenotype(phenotype, adapter)
        if phenotype_obj:
            phenotype_groups.append[phenotype_obj]
    if phenotype_groups:
        case_obj['phenotype_groups'] = phenotype_groups


    # Files
    case_obj['madeline_info'] = case_data.get('madeline_info')
    case_obj['vcf_files'] = case_data.get('vcf_files', {})

    case_obj['has_svvariants'] = False
    if (case_obj['vcf_files'].get('vcf_sv') or case_obj['vcf_files'].get('vcf_sv_research')):
        case_obj['has_svvariants'] = True

    case_obj['is_migrated'] = False

    return case_obj