Exemple #1
0
 def _process_result(variant_json, saved_variant):
     if add_tags:
         variant_json.update({
             'tags': [
                 get_json_for_variant_tag(tag)
                 for tag in saved_variant.varianttag_set.all()
             ],
             'functionalData': [
                 get_json_for_variant_functional_data(tag)
                 for tag in saved_variant.variantfunctionaldata_set.all()
             ],
             'notes': [
                 get_json_for_variant_note(tag)
                 for tag in saved_variant.variantnote_set.all()
             ],
         })
     if add_details:
         saved_variant_json = json.loads(saved_variant.saved_variant_json
                                         or '{}')
         variant_json.update(
             variant_details(saved_variant_json, project
                             or saved_variant.project, user, **kwargs))
     variant_json.update({
         'variantId': saved_variant.guid,  # TODO get from json
         'familyGuids': [saved_variant.family.guid],
     })
     return variant_json
 def _process_result(variant_json, saved_variant):
     if add_tags:
         variant_json.update({
             'tags': [get_json_for_variant_tag(tag) for tag in saved_variant.varianttag_set.all()],
             'functionalData': [get_json_for_variant_functional_data(tag) for tag in
                                saved_variant.variantfunctionaldata_set.all()],
             'notes': [get_json_for_variant_note(tag) for tag in saved_variant.variantnote_set.all()],
         })
     if add_details:
         saved_variant_json = json.loads(saved_variant.saved_variant_json or '{}')
         variant_json.update(variant_details(saved_variant_json, project or saved_variant.project, user, **kwargs))
     variant_json.update({
         'variantId': saved_variant.guid,  # TODO get from json
         'familyGuids': [saved_variant.family.guid],
     })
     return variant_json
Exemple #3
0
def _generate_rows(project, loaded_samples_by_project_family,
                   saved_variants_by_project_family, errors):
    rows = []

    loaded_samples_by_family = loaded_samples_by_project_family[project.guid]
    saved_variants_by_family = saved_variants_by_project_family[project.guid]

    if not loaded_samples_by_family:
        errors.append("No data loaded for project: %s" % project)
        logger.info("No data loaded for project: %s" % project)
        return []

    if "external" in project.name or "reprocessed" in project.name:
        sequencing_approach = "REAN"
    else:
        sequencing_approach = loaded_samples_by_family.values(
        )[0][-1].sample_type

    now = timezone.now()
    for family in project.families:
        samples = loaded_samples_by_family.get(family.guid)
        if not samples:
            errors.append("No data loaded for family: %s. Skipping..." %
                          family)
            continue

        row = {
            "project_guid":
            project.guid,
            "family_guid":
            family.guid,
            "family_id":
            family.family_id,
            "collaborator":
            project.name,
            "sequencing_approach":
            sequencing_approach,
            "extras_pedigree_url":
            family.pedigree_image.url if family.pedigree_image else "",
            "coded_phenotype":
            family.coded_phenotype or "",
            "analysis_summary": (family.analysis_summary or '').strip('" \n'),
        }
        row.update(DEFAULT_ROW)

        t0 = samples[0].loaded_date
        t0_diff = rdelta.relativedelta(now, t0)
        t0_months_since_t0 = t0_diff.years * 12 + t0_diff.months
        row.update({
            "t0": t0,
            "t0_copy": t0,
            "months_since_t0": t0_months_since_t0,
        })
        if t0_months_since_t0 < 12:
            row['analysis_complete_status'] = "first_pass_in_progress"

        submitted_to_mme = SEQR_ID_TO_MME_ID_MAP.find_one({
            'project_id':
            project.deprecated_project_id,
            'family_id':
            family.family_id
        })
        if submitted_to_mme:
            row["submitted_to_mme"] = "Y"

        phenotips_individual_data_records = [
            json.loads(i.phenotips_data) for i in family.individual_set.all()
            if i.phenotips_data
        ]

        phenotips_individual_expected_inheritance_model = [
            inheritance_mode["label"]
            for phenotips_data in phenotips_individual_data_records
            for inheritance_mode in phenotips_data.get(
                "global_mode_of_inheritance", [])
        ]
        if len(phenotips_individual_expected_inheritance_model) == 1:
            row["expected_inheritance_model"] = phenotips_individual_expected_inheritance_model.pop(
            )

        phenotips_individual_mim_disorders = [
            phenotips_data.get("disorders", [])
            for phenotips_data in phenotips_individual_data_records
        ]
        omim_number_initial = next(
            (disorder["id"] for disorders in phenotips_individual_mim_disorders
             for disorder in disorders if "id" in disorder),
            '').replace("MIM:", "")
        if omim_number_initial:
            row.update({
                "omim_number_initial": omim_number_initial,
                "phenotype_class": "Known",
            })

        if family.post_discovery_omim_number:
            row["omim_number_post_discovery"] = family.post_discovery_omim_number

        phenotips_individual_features = [
            phenotips_data.get("features", [])
            for phenotips_data in phenotips_individual_data_records
        ]
        category_not_set_on_some_features = False
        for features_list in phenotips_individual_features:
            for feature in features_list:
                if "category" not in feature:
                    category_not_set_on_some_features = True
                    continue

                if feature["observed"].lower() == "yes":
                    hpo_category_id = feature["category"]
                    hpo_category_name = HPO_CATEGORY_NAMES[hpo_category_id]
                    key = hpo_category_name.lower().replace(" ", "_").replace(
                        "/", "_")

                    row[key] = "Y"
                elif feature["observed"].lower() == "no":
                    continue
                else:
                    raise ValueError("Unexpected value for 'observed' in %s" %
                                     (feature, ))

        if category_not_set_on_some_features:
            errors.append(
                "HPO category field not set for some HPO terms in %s" % family)

        saved_variants = saved_variants_by_family.get(family.guid)
        if not saved_variants:
            rows.append(row)
            continue

        saved_variants_to_json = {}
        for variant in saved_variants:
            if not variant.saved_variant_json:
                errors.append("%s - variant annotation not found" % variant)
                rows.append(row)
                continue

            saved_variant_json = variant_details(json.loads(
                variant.saved_variant_json),
                                                 project,
                                                 user=None)

            if not saved_variant_json['transcripts']:
                errors.append("%s - no gene ids" % variant)
                rows.append(row)
                continue

            saved_variants_to_json[variant] = saved_variant_json

        affected_sample_guids = set()
        unaffected_sample_guids = set()
        for sample in samples:
            if sample.individual.affected == "A":
                affected_sample_guids.add(sample.guid)
            elif sample.individual.affected == "N":
                unaffected_sample_guids.add(sample.guid)

        potential_compound_het_genes = defaultdict(set)
        for variant, saved_variant_json in saved_variants_to_json.items():
            inheritance_models = set()

            affected_indivs_with_hom_alt_variants = set()
            affected_indivs_with_het_variants = set()
            unaffected_indivs_with_hom_alt_variants = set()
            unaffected_indivs_with_het_variants = set()
            is_x_linked = False

            genotypes = saved_variant_json.get('genotypes')
            if genotypes:
                chrom = saved_variant_json['chrom']
                is_x_linked = "X" in chrom
                for sample_guid, genotype in genotypes.items():
                    if genotype[
                            "numAlt"] == 2 and sample_guid in affected_sample_guids:
                        affected_indivs_with_hom_alt_variants.add(sample_guid)
                    elif genotype[
                            "numAlt"] == 1 and sample_guid in affected_sample_guids:
                        affected_indivs_with_het_variants.add(sample_guid)
                    elif genotype[
                            "numAlt"] == 2 and sample_guid in unaffected_sample_guids:
                        unaffected_indivs_with_hom_alt_variants.add(
                            sample_guid)
                    elif genotype[
                            "numAlt"] == 1 and sample_guid in unaffected_sample_guids:
                        unaffected_indivs_with_het_variants.add(sample_guid)

            # AR-homozygote, AR-comphet, AR, AD, de novo, X-linked, UPD, other, multiple
            if not unaffected_indivs_with_hom_alt_variants and affected_indivs_with_hom_alt_variants:
                if is_x_linked:
                    inheritance_models.add("X-linked")
                else:
                    inheritance_models.add("AR-homozygote")

            if not unaffected_indivs_with_hom_alt_variants and not unaffected_indivs_with_het_variants and affected_indivs_with_het_variants:
                if unaffected_sample_guids:
                    inheritance_models.add("de novo")
                else:
                    inheritance_models.add("AD")

            if not unaffected_indivs_with_hom_alt_variants and (
                    len(unaffected_sample_guids) < 2
                    or unaffected_indivs_with_het_variants
            ) and affected_indivs_with_het_variants and not affected_indivs_with_hom_alt_variants:
                for gene_id in saved_variant_json['transcripts']:
                    potential_compound_het_genes[gene_id].add(variant)

            saved_variant_json['inheritance'] = inheritance_models

        gene_ids_to_saved_variants = defaultdict(set)
        gene_ids_to_variant_tag_names = defaultdict(set)
        gene_ids_to_inheritance = defaultdict(set)
        # Compound het variants are reported in the gene that they share
        for gene_id, variants in potential_compound_het_genes.items():
            if len(variants) > 1:
                gene_ids_to_inheritance[gene_id].add("AR-comphet")
                # Only include compound hets for one of the genes they are both in
                existing_gene_id = next(
                    (existing_gene_id for existing_gene_id, existing_variants
                     in gene_ids_to_saved_variants.items()
                     if existing_variants == variants), None)
                if existing_gene_id:
                    main_gene_ids = {
                        saved_variants_to_json[variant]['mainTranscript']
                        ['geneId']
                        for variant in variants
                    }
                    if gene_id in main_gene_ids:
                        gene_ids_to_saved_variants[
                            gene_id] = gene_ids_to_saved_variants[
                                existing_gene_id]
                        del gene_ids_to_saved_variants[existing_gene_id]
                        gene_ids_to_variant_tag_names[
                            gene_id] = gene_ids_to_variant_tag_names[
                                existing_gene_id]
                        del gene_ids_to_variant_tag_names[existing_gene_id]
                else:
                    for variant in variants:
                        saved_variants_to_json[variant]['inheritance'] = {
                            "AR-comphet"
                        }
                        gene_ids_to_variant_tag_names[gene_id].update({
                            vt.variant_tag_type.name
                            for vt in variant.discovery_tags
                        })
                    gene_ids_to_saved_variants[gene_id].update(variants)

        # Non-compound het variants are reported in the main transcript gene
        for variant, saved_variant_json in saved_variants_to_json.items():
            if "AR-comphet" not in saved_variant_json['inheritance']:
                gene_id = saved_variant_json['mainTranscript']['geneId']
                gene_ids_to_saved_variants[gene_id].add(variant)
                gene_ids_to_variant_tag_names[gene_id].update({
                    vt.variant_tag_type.name
                    for vt in variant.discovery_tags
                })
                gene_ids_to_inheritance[gene_id].update(
                    saved_variant_json['inheritance'])

        if len(gene_ids_to_saved_variants) > 1:
            row["gene_count"] = len(gene_ids_to_saved_variants)

        for gene_id, variants in gene_ids_to_saved_variants.items():
            # create a copy of the row dict
            row = dict(row)

            row["actual_inheritance_model"] = ", ".join(
                gene_ids_to_inheritance[gene_id])

            row["gene_id"] = gene_id

            variant_tag_names = gene_ids_to_variant_tag_names[gene_id]

            has_tier1 = any(
                name.startswith("Tier 1") for name in variant_tag_names)
            has_tier2 = any(
                name.startswith("Tier 2") for name in variant_tag_names)
            has_known_gene_for_phenotype = 'Known gene for phenotype' in variant_tag_names

            row.update({
                "solved": ("TIER 1 GENE" if
                           (has_tier1 or has_known_gene_for_phenotype) else
                           ("TIER 2 GENE" if has_tier2 else "N")),
                "komp_early_release":
                "Y" if 'Share with KOMP' in variant_tag_names else "N",
            })

            if has_tier1 or has_tier2 or has_known_gene_for_phenotype:
                row.update({
                    "posted_publicly":
                    "",
                    "analysis_complete_status":
                    "complete",
                    "novel_mendelian_gene":
                    "Y" if any("Novel gene" in name
                               for name in variant_tag_names) else "N",
                })

            if any(tag in variant_tag_names for tag in [
                    'Tier 1 - Phenotype expansion',
                    'Tier 1 - Novel mode of inheritance',
                    'Tier 2 - Phenotype expansion',
            ]):
                row["phenotype_class"] = "EXPAN"
            elif any(tag in variant_tag_names for tag in [
                    'Tier 1 - Phenotype not delineated',
                    'Tier 2 - Phenotype not delineated'
            ]):
                row["phenotype_class"] = "UE"

            if not submitted_to_mme:
                if has_tier1 or has_tier2:
                    row["submitted_to_mme"] = "N" if t0_months_since_t0 > 7 else "TBD"
                elif has_known_gene_for_phenotype:
                    row["submitted_to_mme"] = "KPG"

            if has_tier1 or has_tier2:
                for functional_field in FUNCTIONAL_DATA_FIELD_MAP.values():
                    if functional_field == ADDITIONAL_KINDREDS_FIELD:
                        row[functional_field] = "1"
                    elif functional_field in METADATA_FUNCTIONAL_DATA_FIELDS:
                        row[functional_field] = "NA"
                    else:
                        row[functional_field] = "N"
            elif has_known_gene_for_phenotype:
                for functional_field in FUNCTIONAL_DATA_FIELD_MAP.values():
                    row[functional_field] = "KPG"

            variant_tag_list = []
            for variant in variants:
                variant_id = "-".join(
                    map(
                        str,
                        list(get_chrom_pos(variant.xpos_start)) +
                        [variant.ref, variant.alt]))
                variant_tag_list += [(variant_id, gene_id,
                                      vt.variant_tag_type.name.lower())
                                     for vt in variant.discovery_tags]

                for f in variant.variantfunctionaldata_set.all():
                    functional_field = FUNCTIONAL_DATA_FIELD_MAP[
                        f.functional_data_tag]
                    if functional_field in METADATA_FUNCTIONAL_DATA_FIELDS:
                        value = f.metadata
                        if functional_field == ADDITIONAL_KINDREDS_FIELD:
                            value = str(int(value) + 1)
                        elif row[functional_field] != 'NS':
                            value = '{} {}'.format(row[functional_field],
                                                   value)
                    else:
                        value = 'Y'

                    row[functional_field] = value

            row["extras_variant_tag_list"] = variant_tag_list

            rows.append(row)

    _update_gene_symbols(rows)
    _update_initial_omim_numbers(rows)

    return rows
Exemple #4
0
def _generate_rows(project, loaded_samples_by_project_family, saved_variants_by_project_family, errors):
    rows = []

    loaded_samples_by_family = loaded_samples_by_project_family[project.guid]
    saved_variants_by_family = saved_variants_by_project_family[project.guid]

    if not loaded_samples_by_family:
        errors.append("No data loaded for project: %s" % project)
        logger.info("No data loaded for project: %s" % project)
        return []

    if "external" in project.name or "reprocessed" in project.name:
        sequencing_approach = "REAN"
    else:
        sequencing_approach = loaded_samples_by_family.values()[0][-1].sample_type

    now = timezone.now()
    for family in project.families:
        samples = loaded_samples_by_family.get(family.guid)
        if not samples:
            errors.append("No data loaded for family: %s. Skipping..." % family)
            continue

        row = {
            "project_guid": project.guid,
            "family_guid": family.guid,
            "family_id": family.family_id,
            "collaborator": project.name,
            "sequencing_approach": sequencing_approach,
            "extras_pedigree_url": family.pedigree_image.url if family.pedigree_image else "",
            "coded_phenotype": family.coded_phenotype or "",
            "pubmed_ids": '; '.join(family.pubmed_ids),
            "analysis_summary": (family.analysis_summary or '').strip('" \n'),
            "row_id": family.guid,
            "num_individuals_sequenced": len({sample.individual for sample in samples})
        }
        row.update(DEFAULT_ROW)

        t0 = samples[0].loaded_date
        t0_diff = rdelta.relativedelta(now, t0)
        t0_months_since_t0 = t0_diff.years * 12 + t0_diff.months
        row.update({
            "t0": t0,
            "t0_copy": t0,
            "months_since_t0": t0_months_since_t0,
        })
        if t0_months_since_t0 < 12:
            row['analysis_complete_status'] = "first_pass_in_progress"

        submitted_to_mme = any(i.mme_submitted_date for i in family.individual_set.all())
        if submitted_to_mme:
            row["submitted_to_mme"] = "Y"

        phenotips_individual_data_records = [json.loads(i.phenotips_data) for i in family.individual_set.all() if i.phenotips_data]

        phenotips_individual_expected_inheritance_model = [
            inheritance_mode["label"] for phenotips_data in phenotips_individual_data_records for inheritance_mode in phenotips_data.get("global_mode_of_inheritance", [])
        ]
        if len(phenotips_individual_expected_inheritance_model) == 1:
            row["expected_inheritance_model"] = phenotips_individual_expected_inheritance_model.pop()

        phenotips_individual_mim_disorders = [phenotips_data.get("disorders", []) for phenotips_data in phenotips_individual_data_records]
        omim_number_initial = next((disorder["id"] for disorders in phenotips_individual_mim_disorders for disorder in disorders if "id" in disorder), '').replace("MIM:", "")
        if omim_number_initial:
            row.update({
                "omim_number_initial": omim_number_initial,
                "phenotype_class": "KNOWN",
            })

        if family.post_discovery_omim_number:
            row["omim_number_post_discovery"] = family.post_discovery_omim_number

        phenotips_individual_features = [phenotips_data.get("features", []) for phenotips_data in phenotips_individual_data_records]
        category_not_set_on_some_features = False
        for features_list in phenotips_individual_features:
            for feature in features_list:
                if "category" not in feature:
                    category_not_set_on_some_features = True
                    continue

                if feature["observed"].lower() == "yes":
                    hpo_category_id = feature["category"]
                    hpo_category_name = HPO_CATEGORY_NAMES[hpo_category_id]
                    key = hpo_category_name.lower().replace(" ", "_").replace("/", "_")

                    row[key] = "Y"
                elif feature["observed"].lower() == "no":
                    continue
                else:
                    raise ValueError("Unexpected value for 'observed' in %s" % (feature,))

        if category_not_set_on_some_features:
            errors.append("HPO category field not set for some HPO terms in %s" % family)

        saved_variants = saved_variants_by_family.get(family.guid)
        if not saved_variants:
            rows.append(row)
            continue

        saved_variants_to_json = {}
        for variant in saved_variants:
            if not variant.saved_variant_json:
                errors.append("%s - variant annotation not found" % variant)
                rows.append(row)
                continue

            saved_variant_json = variant_details(json.loads(variant.saved_variant_json), project, user=None)

            if not saved_variant_json['transcripts']:
                errors.append("%s - no gene ids" % variant)
                rows.append(row)
                continue

            saved_variants_to_json[variant] = saved_variant_json

        affected_individual_guids = set()
        unaffected_individual_guids = set()
        for sample in samples:
            if sample.individual.affected == "A":
                affected_individual_guids.add(sample.individual.guid)
            elif sample.individual.affected == "N":
                unaffected_individual_guids.add(sample.individual.guid)

        potential_compound_het_genes = defaultdict(set)
        for variant, saved_variant_json in saved_variants_to_json.items():
            inheritance_models = set()

            affected_indivs_with_hom_alt_variants = set()
            affected_indivs_with_het_variants = set()
            unaffected_indivs_with_hom_alt_variants = set()
            unaffected_indivs_with_het_variants = set()
            is_x_linked = False

            genotypes = saved_variant_json.get('genotypes')
            if genotypes:
                chrom = saved_variant_json['chrom']
                is_x_linked = "X" in chrom
                for sample_guid, genotype in genotypes.items():
                    if genotype["numAlt"] == 2 and sample_guid in affected_individual_guids:
                        affected_indivs_with_hom_alt_variants.add(sample_guid)
                    elif genotype["numAlt"] == 1 and sample_guid in affected_individual_guids:
                        affected_indivs_with_het_variants.add(sample_guid)
                    elif genotype["numAlt"] == 2 and sample_guid in unaffected_individual_guids:
                        unaffected_indivs_with_hom_alt_variants.add(sample_guid)
                    elif genotype["numAlt"] == 1 and sample_guid in unaffected_individual_guids:
                        unaffected_indivs_with_het_variants.add(sample_guid)

            # AR-homozygote, AR-comphet, AR, AD, de novo, X-linked, UPD, other, multiple
            if not unaffected_indivs_with_hom_alt_variants and affected_indivs_with_hom_alt_variants:
                if is_x_linked:
                    inheritance_models.add("X-linked")
                else:
                    inheritance_models.add("AR-homozygote")

            if not unaffected_indivs_with_hom_alt_variants and not unaffected_indivs_with_het_variants and affected_indivs_with_het_variants:
                if unaffected_individual_guids:
                    inheritance_models.add("de novo")
                else:
                    inheritance_models.add("AD")

            if not unaffected_indivs_with_hom_alt_variants and (len(
                    unaffected_individual_guids) < 2 or unaffected_indivs_with_het_variants) and affected_indivs_with_het_variants and not affected_indivs_with_hom_alt_variants:
                for gene_id in saved_variant_json['transcripts']:
                    potential_compound_het_genes[gene_id].add(variant)

            saved_variant_json['inheritance'] = inheritance_models

        gene_ids_to_saved_variants = defaultdict(set)
        gene_ids_to_variant_tag_names = defaultdict(set)
        gene_ids_to_inheritance = defaultdict(set)
        # Compound het variants are reported in the gene that they share
        for gene_id, variants in potential_compound_het_genes.items():
            if len(variants) > 1:
                gene_ids_to_inheritance[gene_id].add("AR-comphet")
                # Only include compound hets for one of the genes they are both in
                existing_gene_id = next((
                    existing_gene_id for existing_gene_id, existing_variants in gene_ids_to_saved_variants.items()
                    if existing_variants == variants), None)
                if existing_gene_id:
                    main_gene_ids = {
                        saved_variants_to_json[variant]['mainTranscript']['geneId'] for variant in variants
                    }
                    if gene_id in main_gene_ids:
                        gene_ids_to_saved_variants[gene_id] = gene_ids_to_saved_variants[existing_gene_id]
                        del gene_ids_to_saved_variants[existing_gene_id]
                        gene_ids_to_variant_tag_names[gene_id] = gene_ids_to_variant_tag_names[existing_gene_id]
                        del gene_ids_to_variant_tag_names[existing_gene_id]
                else:
                    for variant in variants:
                        saved_variants_to_json[variant]['inheritance'] = {"AR-comphet"}
                        gene_ids_to_variant_tag_names[gene_id].update(
                            {vt.variant_tag_type.name for vt in variant.discovery_tags})
                    gene_ids_to_saved_variants[gene_id].update(variants)

        # Non-compound het variants are reported in the main transcript gene
        for variant, saved_variant_json in saved_variants_to_json.items():
            if "AR-comphet" not in saved_variant_json['inheritance']:
                gene_id = saved_variant_json['mainTranscript']['geneId']
                gene_ids_to_saved_variants[gene_id].add(variant)
                gene_ids_to_variant_tag_names[gene_id].update({vt.variant_tag_type.name for vt in variant.discovery_tags})
                gene_ids_to_inheritance[gene_id].update(saved_variant_json['inheritance'])

        if len(gene_ids_to_saved_variants) > 1:
            row["gene_count"] = len(gene_ids_to_saved_variants)

        for gene_id, variants in gene_ids_to_saved_variants.items():
            # create a copy of the row dict
            row = dict(row)

            row["actual_inheritance_model"] = ", ".join(gene_ids_to_inheritance[gene_id])

            row["gene_id"] = gene_id
            row["row_id"] += gene_id

            variant_tag_names = gene_ids_to_variant_tag_names[gene_id]

            has_tier1 = any(name.startswith("Tier 1") for name in variant_tag_names)
            has_tier2 = any(name.startswith("Tier 2") for name in variant_tag_names)
            has_known_gene_for_phenotype = 'Known gene for phenotype' in variant_tag_names

            row.update({
                "solved": ("TIER 1 GENE" if (has_tier1 or has_known_gene_for_phenotype) else (
                    "TIER 2 GENE" if has_tier2 else "N")),
                "komp_early_release": "Y" if 'Share with KOMP' in variant_tag_names else "N",
            })

            if has_tier1 or has_tier2 or has_known_gene_for_phenotype:
                row.update({
                    "posted_publicly":  "",
                    "analysis_complete_status": "complete",
                    "novel_mendelian_gene":  "Y" if any("Novel gene" in name for name in variant_tag_names) else "N",
                })

                if has_known_gene_for_phenotype:
                    row["phenotype_class"] = "KNOWN"
                elif any(tag in variant_tag_names for tag in [
                    'Tier 1 - Known gene, new phenotype', 'Tier 2 - Known gene, new phenotype',
                ]):
                    row["phenotype_class"] = "NEW"
                elif any(tag in variant_tag_names for tag in [
                    'Tier 1 - Phenotype expansion', 'Tier 1 - Novel mode of inheritance',  'Tier 2 - Phenotype expansion',
                ]):
                    row["phenotype_class"] = "EXPAN"
                elif any(tag in variant_tag_names for tag in [
                    'Tier 1 - Phenotype not delineated', 'Tier 2 - Phenotype not delineated'
                ]):
                    row["phenotype_class"] = "UE"

            if not submitted_to_mme:
                if has_tier1 or has_tier2:
                    row["submitted_to_mme"] = "N" if t0_months_since_t0 > 7 else "TBD"
                elif has_known_gene_for_phenotype:
                    row["submitted_to_mme"] = "KPG"

            if has_tier1 or has_tier2:
                # Set defaults
                for functional_field in FUNCTIONAL_DATA_FIELD_MAP.values():
                    if functional_field == ADDITIONAL_KINDREDS_FIELD:
                        row[functional_field] = "1"
                    elif functional_field in METADATA_FUNCTIONAL_DATA_FIELDS:
                        row[functional_field] = "NA"
                    else:
                        row[functional_field] = "N"
                # Set values
                for variant in variants:
                    for f in variant.variantfunctionaldata_set.all():
                        functional_field = FUNCTIONAL_DATA_FIELD_MAP[f.functional_data_tag]
                        if functional_field in METADATA_FUNCTIONAL_DATA_FIELDS:
                            value = f.metadata
                            if functional_field == ADDITIONAL_KINDREDS_FIELD:
                                value = str(int(value) + 1)
                            elif functional_field == OVERLAPPING_KINDREDS_FIELD:
                                existing_val = row[functional_field]
                                if existing_val != 'NA':
                                    value = str(max(int(existing_val), int(value)))
                            elif row[functional_field] != 'NS':
                                value = '{} {}'.format(row[functional_field], value)
                        else:
                            value = 'Y'

                        row[functional_field] = value
            elif has_known_gene_for_phenotype:
                for functional_field in FUNCTIONAL_DATA_FIELD_MAP.values():
                    row[functional_field] = "KPG"

            row["extras_variant_tag_list"] = []
            for variant in variants:
                variant_id = "-".join(map(str, list(get_chrom_pos(variant.xpos_start)) + [variant.ref, variant.alt]))
                row["extras_variant_tag_list"] += [
                    (variant_id, gene_id, vt.variant_tag_type.name.lower()) for vt in variant.discovery_tags
                ]

            rows.append(row)

    _update_gene_symbols(rows)
    _update_initial_omim_numbers(rows)

    return rows