Ejemplo n.º 1
0
def parse_case(config, ped=None):
    """Parse case information from config or PED files.

    Args:
        config (dict): case config with detailed information
        ped (stream): PED file stream with sample information

    Returns:
        dict: parsed case data
    """
    if 'owner' not in config:
        raise ConfigError("A case has to have a owner")
    owner = config['owner']

    if ped:
        with open(ped, 'r') as f:
            family_id, samples = parse_ped(f)
        config['family'] = family_id
        config['samples'] = samples

    if 'family' not in config:
        raise ConfigError("A case has to have a 'family'")
    family_id = config['family']

    individuals = parse_individuals(config['samples'])

    if 'vcf_snv' not in config:
        raise ConfigError("A case has to have a snv vcf")

    case_data = {
        'owner': owner,
        'collaborators': [owner],
        # Q: can we switch to a dash? we use this across other apps
        'case_id': "{}-{}".format(owner, family_id),
        'display_name': family_id,
        'genome_build': config.get('human_genome_build'),
        'rank_model_version': config.get('rank_model_version'),
        'rank_score_threshold': config.get('rank_score_threshold', 5),
        'analysis_date': config['analysis_date'],
        'individuals': individuals,
        'vcf_files': {
            'vcf_snv': config.get('vcf_snv'),
            'vcf_sv': config.get('vcf_sv'),
            'vcf_snv_research': config.get('vcf_snv_research'),
            'vcf_sv_research': config.get('vcf_sv_research'),
        },
        'default_panels': config.get('default_gene_panels'),
        'gene_panels': config.get('gene_panels'),
        'assignee': config.get('assignee'),
    }

    # add the pedigree figure, this is a xml file which is dumped in the db
    if 'madeline' in config:
        mad_path = Path(config['madeline'])
        if not mad_path.exists():
            raise ValueError("madeline path not found: {}".format(mad_path))
        with mad_path.open('r') as in_handle:
            case_data['madeline_info'] = in_handle.read()

    return case_data
Ejemplo n.º 2
0
def parse_case(config):
    """Parse case information from config or PED files.

    Args:
        config (dict): case config with detailed information

    Returns:
        dict: parsed case data
    """
    if 'owner' not in config:
        raise ConfigError("A case has to have a owner")

    if 'family' not in config:
        raise ConfigError("A case has to have a 'family'")

    individuals = parse_individuals(config['samples'])
    case_data = {
        'owner': config['owner'],
        'collaborators': [config['owner']],
        'case_id': config['family'],
        'display_name': config.get('family_name', config['family']),
        'genome_build': config.get('human_genome_build'),
        'rank_model_version': config.get('rank_model_version'),
        'rank_score_threshold': config.get('rank_score_threshold', 0),
        'sv_rank_model_version': config.get('sv_rank_model_version'),
        'analysis_date': config['analysis_date'],
        'individuals': individuals,
        'vcf_files': {
            'vcf_snv': config.get('vcf_snv'),
            'vcf_sv': config.get('vcf_sv'),
            'vcf_str': config.get('vcf_str'),
            'vcf_cancer': config.get('vcf_cancer'),
            'vcf_snv_research': config.get('vcf_snv_research'),
            'vcf_sv_research': config.get('vcf_sv_research'),
            'vcf_cancer_research': config.get('vcf_cancer_research'),
        },
        'default_panels': config.get('default_gene_panels', []),
        'gene_panels': config.get('gene_panels', []),
        'assignee': config.get('assignee'),
        'peddy_ped': config.get('peddy_ped'),
        'peddy_sex': config.get('peddy_sex'),
        'peddy_check': config.get('peddy_check'),
        'delivery_report': config.get('delivery_report'),
        'multiqc': config.get('multiqc'),
        'track': config.get('track', 'rare'),
    }

    # add the pedigree figure, this is a xml file which is dumped in the db
    if 'madeline' in config:
        mad_path = Path(config['madeline'])
        if not mad_path.exists():
            raise ValueError("madeline path not found: {}".format(mad_path))
        with mad_path.open('r') as in_handle:
            case_data['madeline_info'] = in_handle.read()

    if (case_data['vcf_files']['vcf_cancer']
            or case_data['vcf_files']['vcf_cancer_research']):
        case_data['track'] = 'cancer'

    return case_data
Ejemplo n.º 3
0
def build_case(case_data, adapter):
    """Build a case object that is to be inserted to the database

    Args:
        case_data (dict): A dictionary with the relevant case information
        adapter (scout.adapter.MongoAdapter)

    Returns:
        case_obj (dict): A case object

    dict(
        case_id = str, # required=True, unique
        display_name = str, # If not display name use case_id
        owner = str, # required

        # These are the names of all the collaborators that are allowed to view the
        # case, including the owner
        collaborators = list, # List of institute_ids
        assignee = str, # _id of a user
        individuals = list, # list of dictionaries with individuals
        created_at = datetime,
        updated_at = datetime,
        suspects = list, # List of variants referred by there _id
        causatives = list, # List of variants referred by there _id

        synopsis = str, # The synopsis is a text blob
        status = str, # default='inactive', choices=STATUS
        is_research = bool, # default=False
        research_requested = bool, # default=False
        rerun_requested = bool, # default=False

        analysis_date = datetime,
        analyses = list, # list of dict

        # default_panels specifies which panels that should be shown when
        # the case is opened
        panels = list, # list of dictionaries with panel information

        dynamic_gene_list = list, # List of genes

        genome_build = str, # This should be 37 or 38
        genome_version = float, # What version of the build

        rank_model_version = str,
        rank_score_threshold = int, # default=8

        phenotype_terms = list, # List of dictionaries with phenotype information
        phenotype_groups = list, # List of dictionaries with phenotype information

        madeline_info = str, # madeline info is a full xml file

        multiqc = str, # path to dir with multiqc information

        vcf_files = dict, # A dictionary with vcf files

        diagnosis_phenotypes = list, # List of references to diseases
        diagnosis_genes = list, # List of references to genes

        has_svvariants = bool, # default=False

        is_migrated = bool # default=False

    )
    """
    log.info("build case with id: {0}".format(case_data['case_id']))
    case_obj = {
        '_id': case_data['case_id'],
        'display_name': case_data.get('display_name', case_data['case_id']),
    }

    # Check if institute exists in database
    try:
        institute_id = case_data['owner']
    except KeyError as err:
        raise ConfigError("Case has to have a institute")
    institute_obj = adapter.institute(institute_id)
    if not institute_obj:
        raise IntegrityError("Institute %s not found in database" %
                             institute_id)
    case_obj['owner'] = case_data['owner']

    # Owner allways has to be part of collaborators
    collaborators = set(case_data.get('collaborators', []))
    collaborators.add(case_data['owner'])
    case_obj['collaborators'] = list(collaborators)

    if case_data.get('assignee'):
        case_obj['assignees'] = [case_data['assignee']]

    # Individuals
    ind_objs = []
    try:
        for individual in case_data.get('individuals', []):
            ind_objs.append(build_individual(individual))
    except Exception as error:
        ## TODO add some action here
        raise error
    # sort the samples to put the affected individual first
    sorted_inds = sorted(ind_objs, key=lambda ind: -ind['phenotype'])
    case_obj['individuals'] = sorted_inds

    now = datetime.now()
    case_obj['created_at'] = now
    case_obj['updated_at'] = now

    if case_data.get('suspects'):
        case_obj['suspects'] = case_data['suspects']
    if case_data.get('causatives'):
        case_obj['causatives'] = case_data['causatives']

    case_obj['synopsis'] = case_data.get('synopsis', '')

    case_obj['status'] = 'inactive'
    case_obj['is_research'] = False
    case_obj['research_requested'] = False
    case_obj['rerun_requested'] = False

    analysis_date = case_data.get('analysis_date')
    if analysis_date:
        case_obj['analysis_date'] = analysis_date

    # We store some metadata and references about gene panels in 'panels'
    case_panels = case_data.get('gene_panels', [])
    default_panels = case_data.get('default_panels', [])
    panels = []

    for panel_name in case_panels:
        panel_obj = adapter.gene_panel(panel_name)
        if not panel_obj:
            raise IntegrityError("Panel %s does not exist in database" %
                                 panel_name)
        panel = {
            'panel_id': panel_obj['_id'],
            'panel_name': panel_obj['panel_name'],
            'display_name': panel_obj['display_name'],
            'version': panel_obj['version'],
            'updated_at': panel_obj['date'],
            'nr_genes': len(panel_obj['genes'])
        }
        if panel_name in default_panels:
            panel['is_default'] = True
        else:
            panel['is_default'] = False
        panels.append(panel)

    case_obj['panels'] = panels

    case_obj['dynamic_gene_list'] = []

    # Meta data
    genome_build = case_data.get('genome_build', '37')
    if not genome_build in ['37', '38']:
        pass
        ##TODO raise exception if invalid genome build was used

    case_obj['genome_build'] = genome_build
    case_obj['genome_version'] = case_data.get('genome_version')

    if case_data.get('rank_model_version'):
        case_obj['rank_model_version'] = str(case_data['rank_model_version'])

    if case_data.get('sv_rank_model_version'):
        case_obj['sv_rank_model_version'] = str(
            case_data['sv_rank_model_version'])

    if case_data.get('rank_score_threshold'):
        case_obj['rank_score_threshold'] = float(
            case_data['rank_score_threshold'])

    # phenotype information
    phenotypes = []
    for phenotype in case_data.get('phenotype_terms', []):
        phenotype_obj = build_phenotype(phenotype, adapter)
        if phenotype_obj:
            phenotypes.append(phenotype_obj)
    if phenotypes:
        case_obj['phenotype_terms'] = phenotypes

    # phenotype groups
    phenotype_groups = []
    for phenotype in case_data.get('phenotype_groups', []):
        phenotype_obj = build_phenotype(phenotype, adapter)
        if phenotype_obj:
            phenotype_groups.append(phenotype_obj)
    if phenotype_groups:
        case_obj['phenotype_groups'] = phenotype_groups

    # Files
    case_obj['madeline_info'] = case_data.get('madeline_info')
    case_obj['chromograph_image_files'] = case_data.get(
        'chromograph_image_files')
    case_obj['chromograph_prefixes'] = case_data.get('chromograph_prefixes')

    if 'multiqc' in case_data:
        case_obj['multiqc'] = case_data.get('multiqc')
    case_obj['vcf_files'] = case_data.get('vcf_files', {})
    case_obj['delivery_report'] = case_data.get('delivery_report')

    case_obj['has_svvariants'] = False
    if (case_obj['vcf_files'].get('vcf_sv')
            or case_obj['vcf_files'].get('vcf_sv_research')):
        case_obj['has_svvariants'] = True

    case_obj['has_strvariants'] = False
    if (case_obj['vcf_files'].get('vcf_str')):
        case_obj['has_strvariants'] = True

    case_obj['is_migrated'] = False

    # What experiment is used, alternatives are rare (rare disease) or cancer
    case_obj['track'] = case_data.get('track', 'rare')

    return case_obj
Ejemplo n.º 4
0
 def mandatory_check_family(cls, value):
     """`family` is mandatory in a case configuration. If not
     provided in config file an exception is raised"""
     if value is None:
         raise ConfigError("A case has to have a 'family'")
     return value
Ejemplo n.º 5
0
def build_case(case_data, adapter):
    """Build a case object that is to be inserted to the database

    Args:
        case_data (dict): A dictionary with the relevant case information
        adapter (scout.adapter.MongoAdapter)

    Returns:
        case_obj (dict): A case object

    dict(
        case_id = str, # required=True, unique
        display_name = str, # If not display name use case_id
        owner = str, # required

        # These are the names of all the collaborators that are allowed to view the
        # case, including the owner
        collaborators = list, # List of institute_ids
        assignee = str, # _id of a user
        individuals = list, # list of dictionaries with individuals
        created_at = datetime,
        updated_at = datetime,
        suspects = list, # List of variants referred by there _id
        causatives = list, # List of variants referred by there _id

        synopsis = str, # The synopsis is a text blob
        status = str, # default='inactive', choices=STATUS
        is_research = bool, # default=False
        research_requested = bool, # default=False
        rerun_requested = bool, # default=False
        cohorts = list, # list of strings
        analysis_date = datetime,
        analyses = list, # list of dict

        # default_panels specifies which panels that should be shown when
        # the case is opened
        panels = list, # list of dictionaries with panel information

        dynamic_gene_list = list, # List of genes

        genome_build = str, # This should be 37 or 38

        rank_model_version = str,
        rank_score_threshold = int, # default=8

        phenotype_terms = list, # List of dictionaries with phenotype information
        phenotype_groups = list, # List of dictionaries with phenotype information

        madeline_info = str, # madeline info is a full xml file

        multiqc = str, # path to dir with multiqc information

        cnv_report = str, # path to file with cnv report
        coverage_qc_report = str, # path to file with coverage and qc report
        gene_fusion_report = str, # path to the gene fusions report
        gene_fusion_report_research = str, # path to the research gene fusions report

        vcf_files = dict, # A dictionary with vcf files

        diagnosis_phenotypes = list, # List of references to diseases
        diagnosis_genes = list, # List of references to genes

        has_svvariants = bool, # default=False

        is_migrated = bool # default=False

    )
    """
    LOG.info("build case with id: {0}".format(case_data["case_id"]))
    case_obj = {
        "_id": case_data["case_id"],
        "display_name": case_data.get("display_name", case_data["case_id"]),
    }

    # Check if institute exists in database
    try:
        institute_id = case_data["owner"]
    except KeyError as err:
        raise ConfigError("Case has to have a institute")
    institute_obj = adapter.institute(institute_id)
    if not institute_obj:
        raise IntegrityError("Institute %s not found in database" %
                             institute_id)
    case_obj["owner"] = case_data["owner"]

    # Owner allways has to be part of collaborators
    collaborators = set(case_data.get("collaborators", []))
    collaborators.add(case_data["owner"])
    case_obj["collaborators"] = list(collaborators)

    if case_data.get("assignee"):
        case_obj["assignees"] = [case_data["assignee"]]

    case_obj["smn_tsv"] = case_data.get("smn_tsv")

    # Individuals
    ind_objs = []
    try:
        for individual in case_data.get("individuals", []):
            ind_objs.append(build_individual(individual))
    except Exception as error:
        ## TODO add some action here
        raise error
    # sort the samples to put the affected individual first
    sorted_inds = sorted(ind_objs, key=lambda ind: -ind["phenotype"])
    case_obj["individuals"] = sorted_inds

    now = datetime.now()
    case_obj["created_at"] = now
    case_obj["updated_at"] = now

    if case_data.get("suspects"):
        case_obj["suspects"] = case_data["suspects"]
    if case_data.get("causatives"):
        case_obj["causatives"] = case_data["causatives"]

    case_obj["synopsis"] = case_data.get("synopsis", "")

    case_obj["status"] = "inactive"
    case_obj["is_research"] = False
    case_obj["research_requested"] = False
    case_obj["rerun_requested"] = False

    case_obj["lims_id"] = case_data.get("lims_id", "")

    analysis_date = case_data.get("analysis_date")
    if analysis_date:
        case_obj["analysis_date"] = analysis_date

    # We store some metadata and references about gene panels in 'panels'
    case_panels = case_data.get("gene_panels", [])
    default_panels = case_data.get("default_panels", [])
    panels = []

    for panel_name in case_panels:
        panel_obj = adapter.gene_panel(panel_name)
        if not panel_obj:
            LOG.warning(
                "Panel %s does not exist in database and will not be saved in case document."
                % panel_name)
            continue
        panel = {
            "panel_id": panel_obj["_id"],
            "panel_name": panel_obj["panel_name"],
            "display_name": panel_obj["display_name"],
            "version": panel_obj["version"],
            "updated_at": panel_obj["date"],
            "nr_genes": len(panel_obj["genes"]),
        }
        if panel_name in default_panels:
            panel["is_default"] = True
        else:
            panel["is_default"] = False
        panels.append(panel)

    case_obj["panels"] = panels

    case_obj["dynamic_gene_list"] = []

    # Meta data
    genome_build = case_data.get("genome_build", "37")
    if not genome_build in ["37", "38"]:
        pass
        ##TODO raise exception if invalid genome build was used

    case_obj["genome_build"] = genome_build

    if case_data.get("rank_model_version"):
        case_obj["rank_model_version"] = str(case_data["rank_model_version"])

    if case_data.get("sv_rank_model_version"):
        case_obj["sv_rank_model_version"] = str(
            case_data["sv_rank_model_version"])

    if case_data.get("rank_score_threshold"):
        case_obj["rank_score_threshold"] = float(
            case_data["rank_score_threshold"])

    # Cohort information
    if case_data.get("cohorts"):
        case_obj["cohorts"] = case_data["cohorts"]
        # Check if all case cohorts are registered under the institute
        institute_cohorts = set(institute_obj.get("cohorts", []))
        all_cohorts = institute_cohorts.union(set(case_obj["cohorts"]))
        if len(all_cohorts) > len(institute_cohorts):
            # if not, update institute with new cohorts
            LOG.warning("Updating institute object with new cohort terms")
            adapter.institute_collection.find_one_and_update(
                {"_id": institute_obj["_id"]},
                {"$set": {
                    "cohorts": list(all_cohorts)
                }})

    # phenotype information

    if case_data.get("phenotype_terms"):
        phenotypes = []
        for phenotype in case_data["phenotype_terms"]:
            phenotype_obj = adapter.hpo_term(phenotype)
            if phenotype_obj is None:
                LOG.warning(
                    f"Could not find term with ID '{phenotype}' in HPO collection, skipping phenotype term."
                )
                continue

            phenotypes.append({
                "phenotype_id": phenotype,
                "feature": phenotype_obj.get("description")
            })
        if phenotypes:
            case_obj["phenotype_terms"] = phenotypes

    # phenotype groups
    if case_data.get("phenotype_groups"):
        phenotype_groups = []
        for phenotype in case_data["phenotype_groups"]:
            phenotype_obj = build_phenotype(phenotype, adapter)
            if phenotype_obj:
                phenotype_groups.append(phenotype_obj)
        if phenotype_groups:
            case_obj["phenotype_groups"] = phenotype_groups

    # Files
    case_obj["madeline_info"] = case_data.get("madeline_info")

    case_obj["custom_images"] = case_data.get("custom_images")
    for custom_report in CUSTOM_CASE_REPORTS:
        if custom_report in case_data:
            case_obj[custom_report] = case_data.get(custom_report)

    case_obj["vcf_files"] = case_data.get("vcf_files", {})
    case_obj["delivery_report"] = case_data.get("delivery_report")

    case_obj["has_svvariants"] = False
    if case_obj["vcf_files"].get("vcf_sv") or case_obj["vcf_files"].get(
            "vcf_sv_research"):
        case_obj["has_svvariants"] = True

    case_obj["has_strvariants"] = False
    if case_obj["vcf_files"].get("vcf_str"):
        case_obj["has_strvariants"] = True

    case_obj["is_migrated"] = False

    # What experiment is used, alternatives are rare (rare disease) or cancer
    case_obj["track"] = case_data.get("track", "rare")

    case_obj["group"] = case_data.get("group", [])

    return case_obj
Ejemplo n.º 6
0
def build_case(case_data, adapter):
    """Build a case object that is to be inserted to the database

    Args:
        case_data (dict): A dictionary with the relevant case information
        adapter (scout.adapter.MongoAdapter)

    Returns:
        case_obj (dict): A case object

    dict(
        case_id = str, # required=True, unique
        display_name = str, # If not display name use case_id
        owner = str, # required

        # These are the names of all the collaborators that are allowed to view the
        # case, including the owner
        collaborators = list, # List of institute_ids
        assignee = str, # _id of a user
        individuals = list, # list of dictionaries with individuals
        created_at = datetime,
        updated_at = datetime,
        suspects = list, # List of variants referred by there _id
        causatives = list, # List of variants referred by there _id

        synopsis = str, # The synopsis is a text blob
        status = str, # default='inactive', choices=STATUS
        is_research = bool, # default=False
        research_requested = bool, # default=False
        rerun_requested = bool, # default=False

        analysis_date = datetime,
        analyses = list, # list of dict

        # default_panels specifies which panels that should be shown when
        # the case is opened
        panels = list, # list of dictionaries with panel information

        dynamic_gene_list = list, # List of genes

        genome_build = str, # This should be 37 or 38
        genome_version = float, # What version of the build

        rank_model_version = str,
        rank_score_threshold = int, # default=8

        phenotype_terms = list, # List of dictionaries with phenotype information
        phenotype_groups = list, # List of dictionaries with phenotype information

        madeline_info = str, # madeline info is a full xml file

        multiqc = str, # path to dir with multiqc information

        vcf_files = dict, # A dictionary with vcf files

        diagnosis_phenotypes = list, # List of references to diseases
        diagnosis_genes = list, # List of references to genes

        has_svvariants = bool, # default=False

        is_migrated = bool # default=False

    )
    """
    log.info("build case with id: {0}".format(case_data["case_id"]))
    case_obj = {
        "_id": case_data["case_id"],
        "display_name": case_data.get("display_name", case_data["case_id"]),
    }

    # Check if institute exists in database
    try:
        institute_id = case_data["owner"]
    except KeyError as err:
        raise ConfigError("Case has to have a institute")
    institute_obj = adapter.institute(institute_id)
    if not institute_obj:
        raise IntegrityError("Institute %s not found in database" %
                             institute_id)
    case_obj["owner"] = case_data["owner"]

    # Owner allways has to be part of collaborators
    collaborators = set(case_data.get("collaborators", []))
    collaborators.add(case_data["owner"])
    case_obj["collaborators"] = list(collaborators)

    if case_data.get("assignee"):
        case_obj["assignees"] = [case_data["assignee"]]

    case_obj["smn_tsv"] = case_data.get("smn_tsv")

    # Individuals
    ind_objs = []
    try:
        for individual in case_data.get("individuals", []):
            ind_objs.append(build_individual(individual))
    except Exception as error:
        ## TODO add some action here
        raise error
    # sort the samples to put the affected individual first
    sorted_inds = sorted(ind_objs, key=lambda ind: -ind["phenotype"])
    case_obj["individuals"] = sorted_inds

    now = datetime.now()
    case_obj["created_at"] = now
    case_obj["updated_at"] = now

    if case_data.get("suspects"):
        case_obj["suspects"] = case_data["suspects"]
    if case_data.get("causatives"):
        case_obj["causatives"] = case_data["causatives"]

    case_obj["synopsis"] = case_data.get("synopsis", "")

    case_obj["status"] = "inactive"
    case_obj["is_research"] = False
    case_obj["research_requested"] = False
    case_obj["rerun_requested"] = False

    analysis_date = case_data.get("analysis_date")
    if analysis_date:
        case_obj["analysis_date"] = analysis_date

    # We store some metadata and references about gene panels in 'panels'
    case_panels = case_data.get("gene_panels", [])
    default_panels = case_data.get("default_panels", [])
    panels = []

    for panel_name in case_panels:
        panel_obj = adapter.gene_panel(panel_name)
        if not panel_obj:
            raise IntegrityError("Panel %s does not exist in database" %
                                 panel_name)
        panel = {
            "panel_id": panel_obj["_id"],
            "panel_name": panel_obj["panel_name"],
            "display_name": panel_obj["display_name"],
            "version": panel_obj["version"],
            "updated_at": panel_obj["date"],
            "nr_genes": len(panel_obj["genes"]),
        }
        if panel_name in default_panels:
            panel["is_default"] = True
        else:
            panel["is_default"] = False
        panels.append(panel)

    case_obj["panels"] = panels

    case_obj["dynamic_gene_list"] = []

    # Meta data
    genome_build = case_data.get("genome_build", "37")
    if not genome_build in ["37", "38"]:
        pass
        ##TODO raise exception if invalid genome build was used

    case_obj["genome_build"] = genome_build
    case_obj["genome_version"] = case_data.get("genome_version")

    if case_data.get("rank_model_version"):
        case_obj["rank_model_version"] = str(case_data["rank_model_version"])

    if case_data.get("sv_rank_model_version"):
        case_obj["sv_rank_model_version"] = str(
            case_data["sv_rank_model_version"])

    if case_data.get("rank_score_threshold"):
        case_obj["rank_score_threshold"] = float(
            case_data["rank_score_threshold"])

    # phenotype information
    phenotypes = []
    for phenotype in case_data.get("phenotype_terms", []):
        phenotype_obj = build_phenotype(phenotype, adapter)
        if phenotype_obj:
            phenotypes.append(phenotype_obj)
    if phenotypes:
        case_obj["phenotype_terms"] = phenotypes

    # phenotype groups
    phenotype_groups = []
    for phenotype in case_data.get("phenotype_groups", []):
        phenotype_obj = build_phenotype(phenotype, adapter)
        if phenotype_obj:
            phenotype_groups.append(phenotype_obj)
    if phenotype_groups:
        case_obj["phenotype_groups"] = phenotype_groups

    # Files
    case_obj["madeline_info"] = case_data.get("madeline_info")
    case_obj["chromograph_image_files"] = case_data.get(
        "chromograph_image_files")
    case_obj["chromograph_prefixes"] = case_data.get("chromograph_prefixes")

    if "multiqc" in case_data:
        case_obj["multiqc"] = case_data.get("multiqc")
    case_obj["vcf_files"] = case_data.get("vcf_files", {})
    case_obj["delivery_report"] = case_data.get("delivery_report")

    case_obj["has_svvariants"] = False
    if case_obj["vcf_files"].get("vcf_sv") or case_obj["vcf_files"].get(
            "vcf_sv_research"):
        case_obj["has_svvariants"] = True

    case_obj["has_strvariants"] = False
    if case_obj["vcf_files"].get("vcf_str"):
        case_obj["has_strvariants"] = True

    case_obj["is_migrated"] = False

    # What experiment is used, alternatives are rare (rare disease) or cancer
    case_obj["track"] = case_data.get("track", "rare")

    return case_obj
Ejemplo n.º 7
0
def parse_case(config):
    """Parse case information from config or PED files.

    Args:
        config (dict): case config with detailed information

    Returns:
        dict: parsed case data
    """
    if "owner" not in config:
        raise ConfigError("A case has to have a owner")

    if "family" not in config:
        raise ConfigError("A case has to have a 'family'")

    individuals = parse_individuals(config["samples"])

    case_data = {
        "owner": config["owner"],
        "collaborators": [config["owner"]],
        "case_id": config["family"],
        "display_name": config.get("family_name", config["family"]),
        "genome_build": config.get("human_genome_build"),
        "rank_model_version": str(config.get("rank_model_version", "")),
        "rank_score_threshold": config.get("rank_score_threshold", 0),
        "sv_rank_model_version": str(config.get("sv_rank_model_version", "")),
        "analysis_date": config.get("analysis_date"),
        "individuals": individuals,
        "vcf_files": {
            "vcf_snv": config.get("vcf_snv"),
            "vcf_sv": config.get("vcf_sv"),
            "vcf_str": config.get("vcf_str"),
            "vcf_cancer": config.get("vcf_cancer"),
            "vcf_cancer_sv": config.get("vcf_cancer_sv"),
            "vcf_snv_research": config.get("vcf_snv_research"),
            "vcf_sv_research": config.get("vcf_sv_research"),
            "vcf_cancer_research": config.get("vcf_cancer_research"),
            "vcf_cancer_sv_research": config.get("vcf_cancer_sv_research"),
        },
        "smn_tsv": config.get("smn_tsv"),
        "default_panels": config.get("default_gene_panels", []),
        "gene_panels": config.get("gene_panels", []),
        "assignee": config.get("assignee"),
        "peddy_ped": config.get("peddy_ped"),
        "peddy_sex": config.get("peddy_sex"),
        "peddy_check": config.get("peddy_check"),
        "delivery_report": config.get("delivery_report"),
        "multiqc": config.get("multiqc"),
        "track": config.get("track", "rare"),
        "chromograph_image_files": config.get("chromograph_image_files"),
        "chromograph_prefixes": config.get("chromograph_prefixes"),
    }

    # add SMN info
    LOG.debug("Checking for SMN TSV..")
    if case_data["smn_tsv"]:
        LOG.info("Adding SMN info from {}.".format(case_data["smn_tsv"]))
        add_smn_info_case(case_data)

    # add the pedigree figure, this is a xml file which is dumped in the db
    if "madeline" in config:
        mad_path = Path(config["madeline"])
        if not mad_path.exists():
            raise ValueError("madeline path not found: {}".format(mad_path))
        with mad_path.open("r") as in_handle:
            case_data["madeline_info"] = in_handle.read()

    if (case_data["vcf_files"]["vcf_cancer"]
            or case_data["vcf_files"]["vcf_cancer_research"]
            or case_data["vcf_files"]["vcf_cancer_sv"]
            or case_data["vcf_files"]["vcf_cancer_sv_research"]):
        case_data["track"] = "cancer"

    case_data["analysis_date"] = get_correct_date(
        case_data.get("analysis_date"))

    return case_data