Example #1
0
    def check_existing_data(self, case_obj, existing_case, institute_obj, update, keep_actions):
        """Make sure data from case to be loaded/reuploaded conforms to case data already saved in database.
           Return eventual evaluated variants to be propagated to the updated case if keep_actions is True

        Args:
            case_obj(dict): case dictionary to be loaded/reuploaded
            existing_case(dict): a case with same _id or same display_name and institute_id as case_obj
            institute_obj(dict): institute dictionary
            update(bool): If existing case should be updated
            keep_actions(bool): If old evaluated variants should be kept when case is updated

        Returns:
            previous_evaluated_variants(list): list of variants evaluated in previous case
                or None if case is not already present in the database.
        """

        if existing_case is None:
            return

        if (
            existing_case["_id"] != case_obj["_id"]
        ):  # This happens whenever institute and case display name coincide
            raise IntegrityError(
                f"A case with different _id ({existing_case['_id']} vs {case_obj['_id']}) and same display name ({case_obj['display_name']}) already exists for this institute."
            )

        if existing_case and not update:
            raise IntegrityError("Case %s already exists in database" % case_obj["_id"])

        # Enforce same display name for updated case as existing case
        if case_obj["display_name"] != existing_case["display_name"]:
            raise IntegrityError("Updated case name doesn't match existing case name.")

        # Check that individuals from updated case match individuals from existing case in ID, name and affected status
        existing_case_inds = set(
            [
                (ind["individual_id"], ind["display_name"], ind["phenotype"])
                for ind in existing_case.get("individuals")
            ]
        )
        case_inds = set(
            [
                (ind["individual_id"], ind["display_name"], ind["phenotype"])
                for ind in case_obj.get("individuals")
            ]
        )
        if existing_case_inds != case_inds:
            raise IntegrityError(
                f"Updated case individuals ({case_inds}) don't match individuals from existing case ({existing_case_inds}). Please either delete old case or modify updated case individuals."
            )

        if keep_actions:
            # collect all variants with user actions for this case
            return list(self.evaluated_variants(case_obj["_id"], institute_obj["_id"]))
Example #2
0
def load_case(adapter, case_obj, update=False):
    """Load a case into the database

    If the case already exists the function will exit.
    If the user want to load a case that is already in the database
    'update' has to be 'True'

    Args:
        adapter (MongoAdapter): connection to the database
        case_obj (dict): case object to persist to the database
        update(bool): If existing case should be updated

    Returns:
        case_obj(dict): A dictionary with the builded case
    """
    logger.info('Loading case {} into database'.format(
        case_obj['display_name']))

    # Check if case exists in database
    existing_case = adapter.case(case_obj['_id'])

    if existing_case:
        if update:
            adapter.update_case(case_obj)
        else:
            raise IntegrityError("Case {0} already exists in database".format(
                case_obj['case_id']))
    else:
        adapter.add_case(case_obj)
    return case_obj
Example #3
0
def load_delivery_report(adapter: MongoAdapter,
                         report_path: str,
                         case_id: str,
                         update: bool = False):
    """Load a delivery report into a case in the database

    If the report already exists the function will exit.
    If the user want to load a report that is already in the database
    'update' has to be 'True'

    Args:
        adapter     (MongoAdapter): Connection to the database
        report_path (string):       Path to delivery report
        case_id     (string):       Optional case identifier
        update      (bool):         If an existing report should be replaced

    Returns:
        updated_case(dict)

    """

    case_obj = adapter.case(case_id=case_id)

    if case_obj is None:
        raise DataNotFoundError("no case found")

    if update or case_obj.get("delivery_report") is None:
        _update_report_path(case_obj, report_path, "delivery_report")
    else:
        raise IntegrityError("Existing report found, use update = True to "
                             "overwrite")

    LOG.info("Saving report for case {} in database".format(case_obj["_id"]))
    return adapter.replace_case(case_obj)
Example #4
0
def load_report(adapter, case_id, report_path, update=False):
    """Add the path to a report to a case
    
    Args:
        adapter(scout.adapter.MongoAdapter)
        case_id(str)
        report_path(str)
        update(bool)
    
    Returns:
        updated_case(dict)
    """
    case_obj = adapter.case(case_id)
    if not case_obj:
        raise IntegrityError("Case {0} does not exist".format(case_id))
    if case_obj.get('delivery_report'):
        if not update:
            raise ValidationError(
                "Delivery report already exists for case {}".format(case_id))

    LOG.info("Set delivery report to %s", report_path)
    updated_case = adapter.case_collection.find_one_and_update(
        {'_id': case_id}, {'$set': {
            'delivery_report': report_path
        }},
        return_document=pymongo.ReturnDocument.AFTER)

    return updated_case
Example #5
0
def build_panel(panel_info, adapter):
    """Build a gene_panel object

        Args:
            panel_info(dict): A dictionary with panel information
            adapter (scout.adapter.MongoAdapter)

        Returns:
            panel_obj(dict)

    gene_panel = dict(
        panel_name = str, # required
        institute = str, # institute_id, required
        version = float, # required
        date = datetime, # required
        display_name = str, # default is panel_name
        genes = list, # list of panel genes, sorted on panel_gene['symbol']
    )

    """
    panel_name = panel_info.get('panel_name')
    if not panel_name:
        raise KeyError("Panel has to have a name")

    panel_obj = dict(panel_name=panel_name)
    logger.info("Building panel with name: {0}".format(panel_name))

    try:
        institute_id = panel_info['institute']
    except KeyError as err:
        raise KeyError("Panel has to have a institute")

    if adapter.institute(institute_id) is None:
        raise IntegrityError("Institute %s could not be found" % institute_id)

    panel_obj['institute'] = panel_info['institute']

    panel_obj['version'] = float(panel_info['version'])

    try:
        panel_obj['date'] = panel_info['date']
    except KeyError as err:
        raise KeyError("Panel has to have a date")

    panel_obj['display_name'] = panel_info.get('display_name',
                                               panel_info['panel_name'])

    gene_objs = []
    for gene_info in panel_info.get('genes', []):
        gene_obj = build_gene(gene_info, adapter)
        gene_objs.append(gene_obj)

    panel_obj['genes'] = gene_objs

    return panel_obj
Example #6
0
    def _add_case(self, case_obj):
        """Add a case to the database
        If the case already exists exception is raised

         Args:
             case_obj(Case)
        """
        if self.case(case_obj["_id"]):
            raise IntegrityError("Case %s already exists in database" % case_obj["_id"])

        return self.case_collection.insert_one(case_obj)
Example #7
0
    def add_case(self, case_obj):
        """Add a case to the database
           If the case already exists exception is raised

            Args:
                case_obj(Case)
        """
        logger.info("Adding case %s to database" % case_obj['case_id'])
        if self.case(case_obj['case_id']):
            raise IntegrityError("Case %s already exists in database" % case_obj['case_id'])

        return self.case_collection.insert_one(case_obj)
Example #8
0
    def load_exon_bulk(self, exon_objs):
        """Load a bulk of exon objects to the database

        Arguments:
            exon_objs(iterable(scout.models.hgnc_exon))

        """
        try:
            result = self.exon_collection.insert_many(transcript_objs)
        except (DuplicateKeyError, BulkWriteError) as err:
            raise IntegrityError(err)
        
        return result
Example #9
0
    def load_transcript_bulk(self, transcript_objs):
        """Load a bulk of transcript objects to the database

        Arguments:
            transcript_objs(iterable(scout.models.hgnc_transcript))

        """
        LOG.info("Loading transcript bulk")
        try:
            result = self.transcript_collection.insert_many(transcript_objs)
        except (DuplicateKeyError, BulkWriteError) as err:
            raise IntegrityError(err)
        
        return result
Example #10
0
def update_panel(
    adapter,
    panel_name,
    panel_version,
    new_version=None,
    new_date=None,
    new_maintainer=None,
):
    """Update a gene panel in the database

    We need to update the actual gene panel and then all cases that refers to the panel.

    Args:
        adapter(scout.adapter.MongoAdapter)
        panel_name(str): Unique name for a gene panel
        panel_version(float)
        new_version(float)
        new_date(datetime.datetime)
        new_maintainer(list(user_id))

    Returns:
        updated_panel(scout.models.GenePanel): The updated gene panel object
    """
    panel_obj = adapter.gene_panel(panel_name, panel_version)

    if not panel_obj:
        raise IntegrityError("Panel %s version %s does not exist" %
                             (panel_name, panel_version))

    updated_panel = adapter.update_panel(panel_obj, new_version, new_date,
                                         new_maintainer)

    panel_id = updated_panel["_id"]

    # We need to alter the embedded panels in all affected cases
    update = {"$set": {}}
    if new_version:
        update["$set"]["panels.$.version"] = updated_panel["version"]
    if new_date:
        update["$set"]["panels.$.updated_at"] = updated_panel["date"]

    # there is however no need to update maintainer for the embedded versions

    if update["$set"] != {}:
        LOG.info("Updating affected cases with {0}".format(update))

        query = {"panels": {"$elemMatch": {"panel_name": panel_name}}}
        adapter.case_collection.update_many(query, update)

    return updated_panel
Example #11
0
    def load_variant(self, variant_obj):
        """Load a variant object

        Args:
            variant_obj(dict)

        Returns:
            inserted_id
        """
        # LOG.debug("Loading variant %s", variant_obj['_id'])
        try:
            result = self.variant_collection.insert_one(variant_obj)
        except DuplicateKeyError as err:
            raise IntegrityError("Variant %s already exists in database", variant_obj['_id'])
        return result
Example #12
0
    def load_disease_term(self, disease_obj):
        """Load a disease term into the database

        Args:
            disease_obj(dict)
        """
        log.debug("Loading disease term %s into database", disease_obj['_id'])
        try:
            self.disease_term_collection.insert_one(disease_obj)
        except DuplicateKeyError as err:
            raise IntegrityError(
                "Disease term %s already exists in database".format(
                    disease_obj['_id']))

        log.debug("Disease term saved")
Example #13
0
    def load_hpo_term(self, hpo_obj):
        """Add a hpo object

        Arguments:
            hpo_obj(dict)

        """
        log.debug("Loading hpo term %s into database", hpo_obj['_id'])
        try:
            self.hpo_term_collection.insert_one(hpo_obj)
        except DuplicateKeyError as err:
            raise IntegrityError(
                "Hpo term %s already exists in database".format(
                    hpo_obj['_id']))
        log.debug("Hpo term saved")
Example #14
0
    def add_gene_panel(self, panel_obj):
        """Add a gene panel to the database

            Args:
                panel_obj(dict)
        """
        panel_name = panel_obj['panel_name']
        panel_version = panel_obj['version']

        if self.gene_panel(panel_name, panel_version):
            raise IntegrityError("Panel {0} with version {1} already"
                                 " exist in database".format(
                                     panel_name, panel_version))
        LOG.info("loading panel {0}, version {1} to database".format(
            panel_name, panel_version))
        self.panel_collection.insert_one(panel_obj)
        LOG.debug("Panel saved")
Example #15
0
    def load_hpo_bulk(self, hpo_bulk):
        """Add a hpo object

        Arguments:
            hpo_bulk(list(scout.models.HpoTerm))

        Returns:
            result: pymongo bulkwrite result

        """
        LOG.debug("Loading hpo bulk")

        try:
            result = self.hpo_term_collection.insert_many(hpo_bulk)
        except (DuplicateKeyError, BulkWriteError) as err:
            raise IntegrityError(err)
        return result
Example #16
0
    def load_managed_variant(self, managed_variant_obj):
        """Load a managed variant object

        Args:
            managed_variant_obj(ManagedVariant)

        Returns:
            inserted_id
        """
        try:
            result = self.managed_variant_collection.insert_one(
                managed_variant_obj)
        except DuplicateKeyError as err:
            raise IntegrityError(
                "Variant %s already exists in database",
                managed_variant_obj["display_id"],
            )

        return result.inserted_id
Example #17
0
    def load_hgnc_bulk(self, gene_objs):
        """Load a bulk of hgnc gene objects
        
        Raises IntegrityError if there are any write concerns

        Args:
            gene_objs(iterable(scout.models.hgnc_gene))

        Returns:
            result (pymongo.results.InsertManyResult)
        """

        LOG.info("Loading gene bulk with length %s", len(gene_objs))
        try:
            result = self.hgnc_collection.insert_many(gene_objs)
        except (DuplicateKeyError, BulkWriteError) as err:
            raise IntegrityError(err)

        return result
Example #18
0
def update_panel(adapter,
                 panel_name,
                 panel_version,
                 new_version=None,
                 new_date=None):
    """Update a gene panel in the database
    
    We need to update the actual gene panel and then all cases that refers to the panel.
    
    Args:
        adapter(scout.adapter.MongoAdapter)
        panel_name(str): Unique name for a gene panel
        panel_version(float)
        new_version(float)
        new_date(datetime.datetime)
    
    Returns:
        updated_panel(scout.models.GenePanel): The updated gene panel object
    """
    panel_obj = adapter.gene_panel(panel_name, panel_version)

    if not panel_obj:
        raise IntegrityError("Panel %s version %s does not exist" %
                             (panel_name, panel_version))

    updated_panel = adapter.update_panel(panel_obj, new_version, new_date)

    panel_id = updated_panel['_id']

    # We need to alter the embedded panels in all affected cases
    update = {'$set': {}}
    if new_version:
        update['$set']['panels.$.version'] = updated_panel['version']
    if new_date:
        update['$set']['panels.$.updated_at'] = updated_panel['date']

    LOG.info('Updating affected cases with {0}'.format(update))

    query = {'panels': {'$elemMatch': {'panel_name': panel_name}}}
    adapter.case_collection.update_many(query, update)

    return updated_panel
Example #19
0
    def add_gene_panel(self, panel_obj):
        """Add a gene panel to the database

        Args:
            panel_obj(dict)
        """
        panel_name = panel_obj["panel_name"]
        panel_version = panel_obj["version"]
        display_name = panel_obj.get("display_name", panel_name)

        if self.gene_panel(panel_name, panel_version):
            raise IntegrityError("Panel {0} with version {1} already"
                                 " exist in database".format(
                                     panel_name, panel_version))
        LOG.info("loading panel %s, version %s to database", display_name,
                 panel_version)
        LOG.info("Nr genes in panel: %s", len(panel_obj.get("genes", [])))
        result = self.panel_collection.insert_one(panel_obj)
        LOG.debug("Panel saved")
        return result.inserted_id
Example #20
0
    def add_user(self, user_obj):
        """Add a user object to the database

            Args:
                user_obj(scout.models.User): A dictionary with user information
        
            Returns:
                user_info(dict): a copy of what was inserted
        """
        LOG.info("Adding user %s to the database", user_obj['email'])
        if not '_id' in user_obj:
            user_obj['_id'] = user_obj['email']
    
        try:
            self.user_collection.insert_one(user_obj)
            LOG.debug("User inserted")
        except DuplicateKeyError as err:
            raise IntegrityError("User {} already exists in database".format(user_obj['email']))

        return user_obj
Example #21
0
    def add_user(self, user_info):
        """Add a user object to the database

            Args:
                user_info(dict): A dictionary with user information
        
            Returns:
                user_info(dict): a copy of what was inserted
        """
        log.info("Adding user %s to the database", user_info['email'])
        if not '_id' in user_info:
            user_info['_id'] = user_info['email']
    
        user_info['created_at'] = datetime.datetime.now()
        try:
            self.user_collection.insert_one(user_info)
            log.debug("User inserted")
        except DuplicateKeyError as err:
            raise IntegrityError("User {} already exists in database".format(user_info['email']))

        return user_info
Example #22
0
    def add_institute(self, institute_obj):
        """Add a institute to the database

        Args:
            institute_obj(Institute)
        """
        internal_id = institute_obj["internal_id"]
        display_name = institute_obj["display_name"]

        # Check if institute already exists
        if self.institute(institute_id=internal_id):
            raise IntegrityError(
                "Institute {0} already exists in database".format(
                    display_name))

        LOG.info("Adding institute with internal_id: {0} and "
                 "display_name: {1}".format(internal_id, display_name))

        insert_info = self.institute_collection.insert_one(institute_obj)
        ##TODO check if insert info was ok
        LOG.info("Institute saved")
Example #23
0
    def add_gene_panel(self, panel_obj, replace=False):
        """Add a gene panel to the database

        Args:
            panel_obj(dict)
            replace(bool), if True, replace panel data in database
        """
        panel_name = panel_obj["panel_name"]
        panel_version = panel_obj["version"]
        display_name = panel_obj.get("display_name", panel_name)

        LOG.info("loading panel %s, version %s to database", display_name,
                 panel_version)
        LOG.info("Nr genes in panel: %s", len(panel_obj.get("genes", [])))

        old_panel = self.gene_panel(panel_name, panel_version)

        if old_panel and replace is False:
            raise IntegrityError("Panel {0} with version {1} already"
                                 " exist in database".format(
                                     panel_name, panel_version))
        elif (
                old_panel
        ):  # Same version of this panel exists, but should be replaced by new panel document
            LOG.warning(
                f"Panel {panel_name} v.{panel_version} already exists. Replacing it with new data"
            )
            new_panel = self.panel_collection.find_one_and_replace(
                old_panel,
                panel_obj,
                return_document=pymongo.ReturnDocument.AFTER)
            LOG.debug("Panel replaced")
            return new_panel["_id"]
        # Else create a new panel document with a given version
        result = self.panel_collection.insert_one(panel_obj)
        LOG.debug("Panel saved")
        return result.inserted_id
Example #24
0
def build_case(case_data, adapter):
    """Build a case object that is to be inserted to the database

    Args:
        case_data (dict): A dictionary with the relevant case information
        adapter (scout.adapter.MongoAdapter)

    Returns:
        case_obj (dict): A case object

    dict(
        case_id = str, # required=True, unique
        display_name = str, # If not display name use case_id
        owner = str, # required

        # These are the names of all the collaborators that are allowed to view the
        # case, including the owner
        collaborators = list, # List of institute_ids
        assignee = str, # _id of a user
        individuals = list, # list of dictionaries with individuals
        created_at = datetime,
        updated_at = datetime,
        suspects = list, # List of variants referred by there _id
        causatives = list, # List of variants referred by there _id

        synopsis = str, # The synopsis is a text blob
        status = str, # default='inactive', choices=STATUS
        is_research = bool, # default=False
        research_requested = bool, # default=False
        rerun_requested = bool, # default=False

        analysis_date = datetime,
        analyses = list, # list of dict

        # default_panels specifies which panels that should be shown when
        # the case is opened
        panels = list, # list of dictionaries with panel information

        dynamic_gene_list = list, # List of genes

        genome_build = str, # This should be 37 or 38
        genome_version = float, # What version of the build

        rank_model_version = str,
        rank_score_threshold = int, # default=8

        phenotype_terms = list, # List of dictionaries with phenotype information
        phenotype_groups = list, # List of dictionaries with phenotype information

        madeline_info = str, # madeline info is a full xml file

        multiqc = str, # path to dir with multiqc information

        vcf_files = dict, # A dictionary with vcf files

        diagnosis_phenotypes = list, # List of references to diseases
        diagnosis_genes = list, # List of references to genes

        has_svvariants = bool, # default=False

        is_migrated = bool # default=False

    )
    """
    log.info("build case with id: {0}".format(case_data['case_id']))
    case_obj = {
        '_id': case_data['case_id'],
        'display_name': case_data.get('display_name', case_data['case_id']),
    }

    # Check if institute exists in database
    try:
        institute_id = case_data['owner']
    except KeyError as err:
        raise ConfigError("Case has to have a institute")
    institute_obj = adapter.institute(institute_id)
    if not institute_obj:
        raise IntegrityError("Institute %s not found in database" %
                             institute_id)
    case_obj['owner'] = case_data['owner']

    # Owner allways has to be part of collaborators
    collaborators = set(case_data.get('collaborators', []))
    collaborators.add(case_data['owner'])
    case_obj['collaborators'] = list(collaborators)

    if case_data.get('assignee'):
        case_obj['assignees'] = [case_data['assignee']]

    # Individuals
    ind_objs = []
    try:
        for individual in case_data.get('individuals', []):
            ind_objs.append(build_individual(individual))
    except Exception as error:
        ## TODO add some action here
        raise error
    # sort the samples to put the affected individual first
    sorted_inds = sorted(ind_objs, key=lambda ind: -ind['phenotype'])
    case_obj['individuals'] = sorted_inds

    now = datetime.now()
    case_obj['created_at'] = now
    case_obj['updated_at'] = now

    if case_data.get('suspects'):
        case_obj['suspects'] = case_data['suspects']
    if case_data.get('causatives'):
        case_obj['causatives'] = case_data['causatives']

    case_obj['synopsis'] = case_data.get('synopsis', '')

    case_obj['status'] = 'inactive'
    case_obj['is_research'] = False
    case_obj['research_requested'] = False
    case_obj['rerun_requested'] = False

    analysis_date = case_data.get('analysis_date')
    if analysis_date:
        case_obj['analysis_date'] = analysis_date

    # We store some metadata and references about gene panels in 'panels'
    case_panels = case_data.get('gene_panels', [])
    default_panels = case_data.get('default_panels', [])
    panels = []

    for panel_name in case_panels:
        panel_obj = adapter.gene_panel(panel_name)
        if not panel_obj:
            raise IntegrityError("Panel %s does not exist in database" %
                                 panel_name)
        panel = {
            'panel_id': panel_obj['_id'],
            'panel_name': panel_obj['panel_name'],
            'display_name': panel_obj['display_name'],
            'version': panel_obj['version'],
            'updated_at': panel_obj['date'],
            'nr_genes': len(panel_obj['genes'])
        }
        if panel_name in default_panels:
            panel['is_default'] = True
        else:
            panel['is_default'] = False
        panels.append(panel)

    case_obj['panels'] = panels

    case_obj['dynamic_gene_list'] = []

    # Meta data
    genome_build = case_data.get('genome_build', '37')
    if not genome_build in ['37', '38']:
        pass
        ##TODO raise exception if invalid genome build was used

    case_obj['genome_build'] = genome_build
    case_obj['genome_version'] = case_data.get('genome_version')

    if case_data.get('rank_model_version'):
        case_obj['rank_model_version'] = str(case_data['rank_model_version'])

    if case_data.get('sv_rank_model_version'):
        case_obj['sv_rank_model_version'] = str(
            case_data['sv_rank_model_version'])

    if case_data.get('rank_score_threshold'):
        case_obj['rank_score_threshold'] = float(
            case_data['rank_score_threshold'])

    # phenotype information
    phenotypes = []
    for phenotype in case_data.get('phenotype_terms', []):
        phenotype_obj = build_phenotype(phenotype, adapter)
        if phenotype_obj:
            phenotypes.append(phenotype_obj)
    if phenotypes:
        case_obj['phenotype_terms'] = phenotypes

    # phenotype groups
    phenotype_groups = []
    for phenotype in case_data.get('phenotype_groups', []):
        phenotype_obj = build_phenotype(phenotype, adapter)
        if phenotype_obj:
            phenotype_groups.append(phenotype_obj)
    if phenotype_groups:
        case_obj['phenotype_groups'] = phenotype_groups

    # Files
    case_obj['madeline_info'] = case_data.get('madeline_info')
    case_obj['chromograph_image_files'] = case_data.get(
        'chromograph_image_files')
    case_obj['chromograph_prefixes'] = case_data.get('chromograph_prefixes')

    if 'multiqc' in case_data:
        case_obj['multiqc'] = case_data.get('multiqc')
    case_obj['vcf_files'] = case_data.get('vcf_files', {})
    case_obj['delivery_report'] = case_data.get('delivery_report')

    case_obj['has_svvariants'] = False
    if (case_obj['vcf_files'].get('vcf_sv')
            or case_obj['vcf_files'].get('vcf_sv_research')):
        case_obj['has_svvariants'] = True

    case_obj['has_strvariants'] = False
    if (case_obj['vcf_files'].get('vcf_str')):
        case_obj['has_strvariants'] = True

    case_obj['is_migrated'] = False

    # What experiment is used, alternatives are rare (rare disease) or cancer
    case_obj['track'] = case_data.get('track', 'rare')

    return case_obj
Example #25
0
    def update_institute(
        self,
        internal_id,
        sanger_recipient=None,
        sanger_recipients=None,
        loqusdb_id=None,
        coverage_cutoff=None,
        frequency_cutoff=None,
        display_name=None,
        remove_sanger=None,
        phenotype_groups=None,
        group_abbreviations=None,
        add_groups=None,
        sharing_institutes=None,
        cohorts=None,
    ):
        """Update the information for an institute

        Args:
            internal_id(str): The internal institute id
            sanger_recipient(str): Email adress to add for sanger order
            sanger_recipients(list): A list of sanger recipients email addresses
            loqusdb_id(str): identify loqusdb setting to use
            coverage_cutoff(int): Update coverage cutoff
            frequency_cutoff(float): New frequency cutoff
            display_name(str): New display name
            remove_sanger(str): Email adress for sanger user to be removed
            phenotype_groups(iterable(str)): New phenotype groups
            group_abbreviations(iterable(str))
            add_groups(bool): If groups should be added. If False replace groups
            sharing_institutes(list(str)): Other institutes to share cases with
            cohorts(list(str)): patient cohorts

        Returns:
            updated_institute(dict)

        """
        add_groups = add_groups or False
        institute_obj = self.institute(internal_id)
        if not institute_obj:
            raise IntegrityError(
                "Institute {} does not exist in database".format(internal_id))

        updates = {"$set": {}}
        updated_institute = institute_obj

        if sanger_recipient:
            user_obj = self.user(sanger_recipient)
            if not user_obj:
                raise IntegrityError(
                    "user {} does not exist in database".format(
                        sanger_recipient))

            LOG.info("Updating sanger recipients for institute: {0} with {1}".
                     format(internal_id, sanger_recipient))
            updates["$push"] = {"sanger_recipients": sanger_recipient}

        if sanger_recipients is not None:
            updates["$set"][
                "sanger_recipients"] = sanger_recipients  # can be empty list

        if remove_sanger:
            LOG.info(
                "Removing sanger recipient {0} from institute: {1}".format(
                    remove_sanger, internal_id))
            updates["$pull"] = {"sanger_recipients": remove_sanger}

        if coverage_cutoff:
            LOG.info(
                "Updating coverage cutoff for institute: {0} to {1}".format(
                    internal_id, coverage_cutoff))
            updates["$set"]["coverage_cutoff"] = coverage_cutoff

        if frequency_cutoff:
            LOG.info(
                "Updating frequency cutoff for institute: {0} to {1}".format(
                    internal_id, frequency_cutoff))
            updates["$set"]["frequency_cutoff"] = frequency_cutoff

        if display_name:
            LOG.info("Updating display name for institute: {0} to {1}".format(
                internal_id, display_name))
            updates["$set"]["display_name"] = display_name

        if phenotype_groups is not None:
            if group_abbreviations:
                group_abbreviations = list(group_abbreviations)
            existing_groups = {}
            if add_groups:
                existing_groups = institute_obj.get("phenotype_groups",
                                                    PHENOTYPE_GROUPS)
            for i, hpo_term in enumerate(phenotype_groups):
                hpo_obj = self.hpo_term(hpo_term)
                if not hpo_obj:
                    return "Term {} does not exist in database".format(
                        hpo_term)
                hpo_id = hpo_obj["hpo_id"]
                description = hpo_obj["description"]
                abbreviation = None
                if group_abbreviations:
                    abbreviation = group_abbreviations[i]
                existing_groups[hpo_term] = {
                    "name": description,
                    "abbr": abbreviation
                }
            updates["$set"]["phenotype_groups"] = existing_groups

        if sharing_institutes is not None:
            updates["$set"]["collaborators"] = sharing_institutes

        if cohorts is not None:
            updates["$set"]["cohorts"] = cohorts

        if loqusdb_id is not None:
            LOG.info("Updating loqusdb id for institute: %s to %s",
                     internal_id, loqusdb_id)
            updates["$set"]["loqusdb_id"] = loqusdb_id

        if updates["$set"].keys() or updates.get("$push") or updates.get(
                "$pull"):
            updates["$set"]["updated_at"] = datetime.now()
            updated_institute = self.institute_collection.find_one_and_update(
                {"_id": internal_id},
                updates,
                return_document=pymongo.ReturnDocument.AFTER,
            )

            LOG.info("Institute updated")

        return updated_institute
Example #26
0
    def load_case(self, config_data, update=False, keep_actions=True):
        """Load a case into the database

        Check if the owner and the institute exists.
        If update is True, old case variants will be removed.

        Args:
            config_data(dict): A dictionary with all the necessary information
            update(bool): If existing case should be updated
            keep_actions(bool): Attempt transfer of existing case user actions to new vars
        Returns:
            case_obj(dict)
        """
        # Check that the owner exists in the database
        institute_obj = self.institute(config_data["owner"])
        if not institute_obj:
            raise IntegrityError("Institute '%s' does not exist in database" % config_data["owner"])
        # Build the case object
        case_obj = build_case(config_data, self)
        # Check if case exists with old case id
        old_caseid = "-".join([case_obj["owner"], case_obj["display_name"]])
        old_case = self.case(old_caseid)

        # This is to keep sanger order and validation status
        old_sanger_variants = self.case_sanger_variants(case_obj["_id"])

        genome_build = str(config_data.get("genome_build", 37))

        if old_case:
            LOG.info(
                "Update case id for existing case: %s -> %s",
                old_caseid,
                case_obj["_id"],
            )
            self.update_caseid(old_case, case_obj["_id"])
            update = True

        # Retrieve info to be propagated to eventual updated case
        # previously evaluated variants (acmg, manual rank, cancer tier, dismissed, mosaic, commented)
        existing_case = self.case(case_id=case_obj["_id"]) or self.case(
            institute_id=institute_obj["_id"], display_name=case_obj["display_name"]
        )
        old_evaluated_variants = self.check_existing_data(
            case_obj, existing_case, institute_obj, update, keep_actions
        )

        if existing_case and keep_actions:
            # collect all variants with user actions for this case
            old_evaluated_variants = list(
                self.evaluated_variants(case_obj["_id"], case_obj["owner"])
            )

        files = [
            {"file_name": "vcf_snv", "variant_type": "clinical", "category": "snv"},
            {"file_name": "vcf_sv", "variant_type": "clinical", "category": "sv"},
            {
                "file_name": "vcf_cancer",
                "variant_type": "clinical",
                "category": "cancer",
            },
            {
                "file_name": "vcf_cancer_sv",
                "variant_type": "clinical",
                "category": "cancer_sv",
            },
            {"file_name": "vcf_str", "variant_type": "clinical", "category": "str"},
        ]

        try:

            for vcf_file in files:
                # Check if file exists
                if not case_obj["vcf_files"].get(vcf_file["file_name"]):
                    LOG.debug("didn't find {}, skipping".format(vcf_file["file_name"]))
                    continue

                variant_type = vcf_file["variant_type"]
                category = vcf_file["category"]
                if update:
                    self.delete_variants(
                        case_id=case_obj["_id"],
                        variant_type=variant_type,
                        category=category,
                    )

                # get custom images from config file
                custom_images = (
                    case_obj["custom_images"][category]
                    if case_obj.get("custom_images") and category in case_obj.get("custom_images")
                    else None
                )
                # add variants
                self.load_variants(
                    case_obj=case_obj,
                    variant_type=variant_type,
                    category=category,
                    build=genome_build,
                    rank_threshold=case_obj.get("rank_score_threshold", 5),
                    custom_images=custom_images,
                )

        except (IntegrityError, ValueError, ConfigError, KeyError) as error:
            LOG.warning(error)

        if existing_case:
            case_obj["rerun_requested"] = False
            if case_obj["status"] in ["active", "archived"]:
                case_obj["status"] = "inactive"

            self.update_case(case_obj)

            # update Sanger status for the new inserted variants
            self.update_case_sanger_variants(institute_obj, case_obj, old_sanger_variants)

            if keep_actions and old_evaluated_variants:
                self.update_variant_actions(institute_obj, case_obj, old_evaluated_variants)

        else:
            LOG.info("Loading case %s into database", case_obj["display_name"])
            self._add_case(case_obj)

        return case_obj
Example #27
0
    def load_case(self, config_data, update=False):
        """Load a case into the database

        Check if the owner and the institute exists.

        Args:
            config_data(dict): A dictionary with all the necessary information
            update(bool): If existing case should be updated

        Returns:
            case_obj(dict)
        """
        # Check that the owner exists in the database
        institute_obj = self.institute(config_data["owner"])
        if not institute_obj:
            raise IntegrityError("Institute '%s' does not exist in database" %
                                 config_data["owner"])

        # Parse the case information
        parsed_case = parse_case(config=config_data)
        # Build the case object
        case_obj = build_case(parsed_case, self)
        # Check if case exists with old case id
        old_caseid = "-".join([case_obj["owner"], case_obj["display_name"]])
        old_case = self.case(old_caseid)
        # This is to keep sanger order and validation status
        old_sanger_variants = self.case_sanger_variants(case_obj["_id"])

        if old_case:
            LOG.info(
                "Update case id for existing case: %s -> %s",
                old_caseid,
                case_obj["_id"],
            )
            self.update_caseid(old_case, case_obj["_id"])
            update = True

        # Check if case exists in database
        existing_case = self.case(case_obj["_id"])
        if existing_case and not update:
            raise IntegrityError("Case %s already exists in database" %
                                 case_obj["_id"])

        files = [
            {
                "file_name": "vcf_snv",
                "variant_type": "clinical",
                "category": "snv"
            },
            {
                "file_name": "vcf_sv",
                "variant_type": "clinical",
                "category": "sv"
            },
            {
                "file_name": "vcf_cancer",
                "variant_type": "clinical",
                "category": "cancer",
            },
            {
                "file_name": "vcf_cancer_sv",
                "variant_type": "clinical",
                "category": "cancer_sv",
            },
            {
                "file_name": "vcf_str",
                "variant_type": "clinical",
                "category": "str"
            },
        ]

        try:
            for vcf_file in files:
                # Check if file exists
                if not case_obj["vcf_files"].get(vcf_file["file_name"]):
                    LOG.debug("didn't find {}, skipping".format(
                        vcf_file["file_name"]))
                    continue

                variant_type = vcf_file["variant_type"]
                category = vcf_file["category"]
                if update:
                    self.delete_variants(
                        case_id=case_obj["_id"],
                        variant_type=variant_type,
                        category=category,
                    )
                self.load_variants(
                    case_obj=case_obj,
                    variant_type=variant_type,
                    category=category,
                    rank_threshold=case_obj.get("rank_score_threshold", 5),
                )

        except (IntegrityError, ValueError, ConfigError, KeyError) as error:
            LOG.warning(error)

        if existing_case and update:
            case_obj["rerun_requested"] = False
            if case_obj["status"] in ["active", "archived"]:
                case_obj["status"] = "inactive"

            self.update_case(case_obj)

            # update Sanger status for the new inserted variants
            self.update_case_sanger_variants(institute_obj, case_obj,
                                             old_sanger_variants)

        else:
            LOG.info("Loading case %s into database", case_obj["display_name"])
            self._add_case(case_obj)

        return case_obj
Example #28
0
def build_case(case_data, adapter):
    """Build a case object that is to be inserted to the database

    Args:
        case_data (dict): A dictionary with the relevant case information
        adapter (scout.adapter.MongoAdapter)

    Returns:
        case_obj (dict): A case object

    dict(
        case_id = str, # required=True, unique
        display_name = str, # If not display name use case_id
        owner = str, # required

        # These are the names of all the collaborators that are allowed to view the
        # case, including the owner
        collaborators = list, # List of institute_ids
        assignee = str, # _id of a user
        individuals = list, # list of dictionaries with individuals
        created_at = datetime,
        updated_at = datetime,
        suspects = list, # List of variants referred by there _id
        causatives = list, # List of variants referred by there _id

        synopsis = str, # The synopsis is a text blob
        status = str, # default='inactive', choices=STATUS
        is_research = bool, # default=False
        research_requested = bool, # default=False
        rerun_requested = bool, # default=False
        cohorts = list, # list of strings
        analysis_date = datetime,
        analyses = list, # list of dict

        # default_panels specifies which panels that should be shown when
        # the case is opened
        panels = list, # list of dictionaries with panel information

        dynamic_gene_list = list, # List of genes

        genome_build = str, # This should be 37 or 38

        rank_model_version = str,
        rank_score_threshold = int, # default=8

        phenotype_terms = list, # List of dictionaries with phenotype information
        phenotype_groups = list, # List of dictionaries with phenotype information

        madeline_info = str, # madeline info is a full xml file

        multiqc = str, # path to dir with multiqc information

        cnv_report = str, # path to file with cnv report
        coverage_qc_report = str, # path to file with coverage and qc report
        gene_fusion_report = str, # path to the gene fusions report
        gene_fusion_report_research = str, # path to the research gene fusions report

        vcf_files = dict, # A dictionary with vcf files

        diagnosis_phenotypes = list, # List of references to diseases
        diagnosis_genes = list, # List of references to genes

        has_svvariants = bool, # default=False

        is_migrated = bool # default=False

    )
    """
    LOG.info("build case with id: {0}".format(case_data["case_id"]))
    case_obj = {
        "_id": case_data["case_id"],
        "display_name": case_data.get("display_name", case_data["case_id"]),
    }

    # Check if institute exists in database
    try:
        institute_id = case_data["owner"]
    except KeyError as err:
        raise ConfigError("Case has to have a institute")
    institute_obj = adapter.institute(institute_id)
    if not institute_obj:
        raise IntegrityError("Institute %s not found in database" %
                             institute_id)
    case_obj["owner"] = case_data["owner"]

    # Owner allways has to be part of collaborators
    collaborators = set(case_data.get("collaborators", []))
    collaborators.add(case_data["owner"])
    case_obj["collaborators"] = list(collaborators)

    if case_data.get("assignee"):
        case_obj["assignees"] = [case_data["assignee"]]

    case_obj["smn_tsv"] = case_data.get("smn_tsv")

    # Individuals
    ind_objs = []
    try:
        for individual in case_data.get("individuals", []):
            ind_objs.append(build_individual(individual))
    except Exception as error:
        ## TODO add some action here
        raise error
    # sort the samples to put the affected individual first
    sorted_inds = sorted(ind_objs, key=lambda ind: -ind["phenotype"])
    case_obj["individuals"] = sorted_inds

    now = datetime.now()
    case_obj["created_at"] = now
    case_obj["updated_at"] = now

    if case_data.get("suspects"):
        case_obj["suspects"] = case_data["suspects"]
    if case_data.get("causatives"):
        case_obj["causatives"] = case_data["causatives"]

    case_obj["synopsis"] = case_data.get("synopsis", "")

    case_obj["status"] = "inactive"
    case_obj["is_research"] = False
    case_obj["research_requested"] = False
    case_obj["rerun_requested"] = False

    case_obj["lims_id"] = case_data.get("lims_id", "")

    analysis_date = case_data.get("analysis_date")
    if analysis_date:
        case_obj["analysis_date"] = analysis_date

    # We store some metadata and references about gene panels in 'panels'
    case_panels = case_data.get("gene_panels", [])
    default_panels = case_data.get("default_panels", [])
    panels = []

    for panel_name in case_panels:
        panel_obj = adapter.gene_panel(panel_name)
        if not panel_obj:
            LOG.warning(
                "Panel %s does not exist in database and will not be saved in case document."
                % panel_name)
            continue
        panel = {
            "panel_id": panel_obj["_id"],
            "panel_name": panel_obj["panel_name"],
            "display_name": panel_obj["display_name"],
            "version": panel_obj["version"],
            "updated_at": panel_obj["date"],
            "nr_genes": len(panel_obj["genes"]),
        }
        if panel_name in default_panels:
            panel["is_default"] = True
        else:
            panel["is_default"] = False
        panels.append(panel)

    case_obj["panels"] = panels

    case_obj["dynamic_gene_list"] = []

    # Meta data
    genome_build = case_data.get("genome_build", "37")
    if not genome_build in ["37", "38"]:
        pass
        ##TODO raise exception if invalid genome build was used

    case_obj["genome_build"] = genome_build

    if case_data.get("rank_model_version"):
        case_obj["rank_model_version"] = str(case_data["rank_model_version"])

    if case_data.get("sv_rank_model_version"):
        case_obj["sv_rank_model_version"] = str(
            case_data["sv_rank_model_version"])

    if case_data.get("rank_score_threshold"):
        case_obj["rank_score_threshold"] = float(
            case_data["rank_score_threshold"])

    # Cohort information
    if case_data.get("cohorts"):
        case_obj["cohorts"] = case_data["cohorts"]
        # Check if all case cohorts are registered under the institute
        institute_cohorts = set(institute_obj.get("cohorts", []))
        all_cohorts = institute_cohorts.union(set(case_obj["cohorts"]))
        if len(all_cohorts) > len(institute_cohorts):
            # if not, update institute with new cohorts
            LOG.warning("Updating institute object with new cohort terms")
            adapter.institute_collection.find_one_and_update(
                {"_id": institute_obj["_id"]},
                {"$set": {
                    "cohorts": list(all_cohorts)
                }})

    # phenotype information

    if case_data.get("phenotype_terms"):
        phenotypes = []
        for phenotype in case_data["phenotype_terms"]:
            phenotype_obj = adapter.hpo_term(phenotype)
            if phenotype_obj is None:
                LOG.warning(
                    f"Could not find term with ID '{phenotype}' in HPO collection, skipping phenotype term."
                )
                continue

            phenotypes.append({
                "phenotype_id": phenotype,
                "feature": phenotype_obj.get("description")
            })
        if phenotypes:
            case_obj["phenotype_terms"] = phenotypes

    # phenotype groups
    if case_data.get("phenotype_groups"):
        phenotype_groups = []
        for phenotype in case_data["phenotype_groups"]:
            phenotype_obj = build_phenotype(phenotype, adapter)
            if phenotype_obj:
                phenotype_groups.append(phenotype_obj)
        if phenotype_groups:
            case_obj["phenotype_groups"] = phenotype_groups

    # Files
    case_obj["madeline_info"] = case_data.get("madeline_info")

    case_obj["custom_images"] = case_data.get("custom_images")
    for custom_report in CUSTOM_CASE_REPORTS:
        if custom_report in case_data:
            case_obj[custom_report] = case_data.get(custom_report)

    case_obj["vcf_files"] = case_data.get("vcf_files", {})
    case_obj["delivery_report"] = case_data.get("delivery_report")

    case_obj["has_svvariants"] = False
    if case_obj["vcf_files"].get("vcf_sv") or case_obj["vcf_files"].get(
            "vcf_sv_research"):
        case_obj["has_svvariants"] = True

    case_obj["has_strvariants"] = False
    if case_obj["vcf_files"].get("vcf_str"):
        case_obj["has_strvariants"] = True

    case_obj["is_migrated"] = False

    # What experiment is used, alternatives are rare (rare disease) or cancer
    case_obj["track"] = case_data.get("track", "rare")

    case_obj["group"] = case_data.get("group", [])

    return case_obj
Example #29
0
def load_variants(adapter, variant_file, case_obj, variant_type='clinical',
                  category='snv', rank_threshold=5, chrom=None, start=None,
                  end=None):
    """Load all variant in variants

        Args:
            adapter(MongoAdapter)
            variant_file(str): Path to variant file
            case(Case)
            variant_type(str)
            category(str): 'snv' or 'sv'
            rank_threshold(int)
            chrom(str)
            start(int)
            end(int)
    """

    institute_obj = adapter.institute(institute_id=case_obj['owner'])

    if not institute_obj:
        raise IntegrityError("Institute {0} does not exist in"
                             " database.".format(case_obj['owner']))

    gene_to_panels = adapter.gene_to_panels()

    hgncid_to_gene = adapter.hgncid_to_gene()

    coordinates = {}

    vcf_obj = VCF(variant_file)

    rank_results_header = parse_rank_results_header(vcf_obj)
    vep_header = parse_vep_header(vcf_obj)

    # This is a dictionary to tell where ind are in vcf
    individual_positions = {}
    for i,ind in enumerate(vcf_obj.samples):
        individual_positions[ind] = i

    logger.info("Start inserting variants into database")
    start_insertion = datetime.now()
    start_five_thousand = datetime.now()
    nr_variants = 0
    nr_inserted = 0
    inserted = 1

    coordinates = False
    if chrom:
        coordinates = {
            'chrom': chrom,
            'start': start,
            'end': end
        }

    try:
        for nr_variants, variant in enumerate(vcf_obj):
            rank_score = parse_rank_score(
                variant.INFO.get('RankScore'),
                case_obj['display_name']
            )
            variant_obj = None
            add_variant = False

            if coordinates or (rank_score > rank_threshold):
                parsed_variant = parse_variant(
                    variant=variant,
                    case=case_obj,
                    variant_type=variant_type,
                    rank_results_header=rank_results_header,
                    vep_header = vep_header,
                    individual_positions = individual_positions
                )
                add_variant = True
                # If there are coordinates the variant should be loaded
                if coordinates:
                    if not check_coordinates(parsed_variant, coordinates):
                        add_variant = False

                if add_variant:
                    variant_obj = build_variant(
                        variant=parsed_variant,
                        institute_id=institute_obj['_id'],
                        gene_to_panels=gene_to_panels,
                        hgncid_to_gene=hgncid_to_gene,
                    )
                    try:
                        load_variant(adapter, variant_obj)
                        nr_inserted += 1
                    except IntegrityError as error:
                        pass

                if (nr_variants != 0 and nr_variants % 5000 == 0):
                    logger.info("%s variants parsed" % str(nr_variants))
                    logger.info("Time to parse variants: {} ".format(
                                datetime.now() - start_five_thousand))
                    start_five_thousand = datetime.now()

                if (nr_inserted != 0 and (nr_inserted * inserted) % (1000 * inserted) == 0):
                    logger.info("%s variants inserted" % nr_inserted)
                    inserted += 1

    except Exception as error:
        if not coordinates:
            logger.warning("Deleting inserted variants")
            delete_variants(adapter, case_obj, variant_type)
        raise error

    logger.info("All variants inserted.")
    logger.info("Number of variants in file: {0}".format(nr_variants + 1))
    logger.info("Number of variants inserted: {0}".format(nr_inserted))
    logger.info("Time to insert variants:{0}".format(datetime.now() - start_insertion))
Example #30
0
def build_case(case_data, adapter):
    """Build a case object that is to be inserted to the database

    Args:
        case_data (dict): A dictionary with the relevant case information
        adapter (scout.adapter.MongoAdapter)

    Returns:
        case_obj (dict): A case object

    dict(
        case_id = str, # required=True, unique
        display_name = str, # If not display name use case_id
        owner = str, # required

        # These are the names of all the collaborators that are allowed to view the
        # case, including the owner
        collaborators = list, # List of institute_ids
        assignee = str, # _id of a user
        individuals = list, # list of dictionaries with individuals
        created_at = datetime,
        updated_at = datetime,
        suspects = list, # List of variants referred by there _id
        causatives = list, # List of variants referred by there _id

        synopsis = str, # The synopsis is a text blob
        status = str, # default='inactive', choices=STATUS
        is_research = bool, # default=False
        research_requested = bool, # default=False
        rerun_requested = bool, # default=False

        analysis_date = datetime,
        analyses = list, # list of dict

        # default_panels specifies which panels that should be shown when
        # the case is opened
        panels = list, # list of dictionaries with panel information

        dynamic_gene_list = list, # List of genes

        genome_build = str, # This should be 37 or 38
        genome_version = float, # What version of the build

        rank_model_version = str,
        rank_score_threshold = int, # default=8

        phenotype_terms = list, # List of dictionaries with phenotype information
        phenotype_groups = list, # List of dictionaries with phenotype information

        madeline_info = str, # madeline info is a full xml file

        multiqc = str, # path to dir with multiqc information

        vcf_files = dict, # A dictionary with vcf files

        diagnosis_phenotypes = list, # List of references to diseases
        diagnosis_genes = list, # List of references to genes

        has_svvariants = bool, # default=False

        is_migrated = bool # default=False

    )
    """
    log.info("build case with id: {0}".format(case_data["case_id"]))
    case_obj = {
        "_id": case_data["case_id"],
        "display_name": case_data.get("display_name", case_data["case_id"]),
    }

    # Check if institute exists in database
    try:
        institute_id = case_data["owner"]
    except KeyError as err:
        raise ConfigError("Case has to have a institute")
    institute_obj = adapter.institute(institute_id)
    if not institute_obj:
        raise IntegrityError("Institute %s not found in database" %
                             institute_id)
    case_obj["owner"] = case_data["owner"]

    # Owner allways has to be part of collaborators
    collaborators = set(case_data.get("collaborators", []))
    collaborators.add(case_data["owner"])
    case_obj["collaborators"] = list(collaborators)

    if case_data.get("assignee"):
        case_obj["assignees"] = [case_data["assignee"]]

    case_obj["smn_tsv"] = case_data.get("smn_tsv")

    # Individuals
    ind_objs = []
    try:
        for individual in case_data.get("individuals", []):
            ind_objs.append(build_individual(individual))
    except Exception as error:
        ## TODO add some action here
        raise error
    # sort the samples to put the affected individual first
    sorted_inds = sorted(ind_objs, key=lambda ind: -ind["phenotype"])
    case_obj["individuals"] = sorted_inds

    now = datetime.now()
    case_obj["created_at"] = now
    case_obj["updated_at"] = now

    if case_data.get("suspects"):
        case_obj["suspects"] = case_data["suspects"]
    if case_data.get("causatives"):
        case_obj["causatives"] = case_data["causatives"]

    case_obj["synopsis"] = case_data.get("synopsis", "")

    case_obj["status"] = "inactive"
    case_obj["is_research"] = False
    case_obj["research_requested"] = False
    case_obj["rerun_requested"] = False

    analysis_date = case_data.get("analysis_date")
    if analysis_date:
        case_obj["analysis_date"] = analysis_date

    # We store some metadata and references about gene panels in 'panels'
    case_panels = case_data.get("gene_panels", [])
    default_panels = case_data.get("default_panels", [])
    panels = []

    for panel_name in case_panels:
        panel_obj = adapter.gene_panel(panel_name)
        if not panel_obj:
            raise IntegrityError("Panel %s does not exist in database" %
                                 panel_name)
        panel = {
            "panel_id": panel_obj["_id"],
            "panel_name": panel_obj["panel_name"],
            "display_name": panel_obj["display_name"],
            "version": panel_obj["version"],
            "updated_at": panel_obj["date"],
            "nr_genes": len(panel_obj["genes"]),
        }
        if panel_name in default_panels:
            panel["is_default"] = True
        else:
            panel["is_default"] = False
        panels.append(panel)

    case_obj["panels"] = panels

    case_obj["dynamic_gene_list"] = []

    # Meta data
    genome_build = case_data.get("genome_build", "37")
    if not genome_build in ["37", "38"]:
        pass
        ##TODO raise exception if invalid genome build was used

    case_obj["genome_build"] = genome_build
    case_obj["genome_version"] = case_data.get("genome_version")

    if case_data.get("rank_model_version"):
        case_obj["rank_model_version"] = str(case_data["rank_model_version"])

    if case_data.get("sv_rank_model_version"):
        case_obj["sv_rank_model_version"] = str(
            case_data["sv_rank_model_version"])

    if case_data.get("rank_score_threshold"):
        case_obj["rank_score_threshold"] = float(
            case_data["rank_score_threshold"])

    # phenotype information
    phenotypes = []
    for phenotype in case_data.get("phenotype_terms", []):
        phenotype_obj = build_phenotype(phenotype, adapter)
        if phenotype_obj:
            phenotypes.append(phenotype_obj)
    if phenotypes:
        case_obj["phenotype_terms"] = phenotypes

    # phenotype groups
    phenotype_groups = []
    for phenotype in case_data.get("phenotype_groups", []):
        phenotype_obj = build_phenotype(phenotype, adapter)
        if phenotype_obj:
            phenotype_groups.append(phenotype_obj)
    if phenotype_groups:
        case_obj["phenotype_groups"] = phenotype_groups

    # Files
    case_obj["madeline_info"] = case_data.get("madeline_info")
    case_obj["chromograph_image_files"] = case_data.get(
        "chromograph_image_files")
    case_obj["chromograph_prefixes"] = case_data.get("chromograph_prefixes")

    if "multiqc" in case_data:
        case_obj["multiqc"] = case_data.get("multiqc")
    case_obj["vcf_files"] = case_data.get("vcf_files", {})
    case_obj["delivery_report"] = case_data.get("delivery_report")

    case_obj["has_svvariants"] = False
    if case_obj["vcf_files"].get("vcf_sv") or case_obj["vcf_files"].get(
            "vcf_sv_research"):
        case_obj["has_svvariants"] = True

    case_obj["has_strvariants"] = False
    if case_obj["vcf_files"].get("vcf_str"):
        case_obj["has_strvariants"] = True

    case_obj["is_migrated"] = False

    # What experiment is used, alternatives are rare (rare disease) or cancer
    case_obj["track"] = case_data.get("track", "rare")

    return case_obj