Python get_xposの例、xbrowse.genomeloc.get_xpos Pythonの例

コード例 #1

0

ファイルを表示

ファイル: reference.py プロジェクト: xirasasa/seqr

 def get_gene_bounds(self, gene_id):
     gene = self.gene_utils.get_genes([gene_id]).get(gene_id)
     if not gene:
         return (None, None, None)
     build = 'Grch37' if gene['chromGrch37'] else 'Grch38'
     chrom = gene['chrom{}'.format(build)]
     start = gene['start{}'.format(build)]
     end = gene['end{}'.format(build)]
     return (genomeloc.get_xpos(chrom,
                                start), genomeloc.get_xpos(chrom, end))

コード例 #2

0

ファイルを表示

ファイル: elasticsearch_datastore.py プロジェクト: tianyunwang/seqr

    def _make_db_query(self, genotype_filter=None, variant_filter=None):
        """
        Caller specifies filters to get_variants, but they are evaluated later.
        Here, we just inspect those filters and see what heuristics we can apply to avoid a full table scan,
        Query here must return a superset of the true get_variants results
        Note that the full annotation isn't stored, so use the fields added by _add_index_fields_to_variant
        """
        db_query = {}

        # genotype filter
        if genotype_filter is not None:
            _add_genotype_filter_to_variant_query(db_query, genotype_filter)

        if variant_filter:
            #logger.info(pformat(variant_filter.toJSON()))

            if variant_filter.locations:
                location_ranges = []
                for i, location in enumerate(variant_filter.locations):
                    if isinstance(location, basestring):
                        chrom, pos_range = location.split(":")
                        start, end = pos_range.split("-")
                        xstart = genomeloc.get_xpos(chrom, int(start))
                        xend = genomeloc.get_xpos(chrom, int(end))
                        variant_filter.locations[i] = (xstart, xend)
                    else:
                        xstart, xend = location

                    location_ranges.append({
                        '$and': [{
                            'xpos': {
                                '$gte': xstart
                            }
                        }, {
                            'xpos': {
                                '$lte': xend
                            }
                        }]
                    })

                db_query['$or'] = location_ranges

            if variant_filter.so_annotations:
                db_query['db_tags'] = {'$in': variant_filter.so_annotations}
            if variant_filter.genes:
                if getattr(variant_filter, 'exclude_genes'):
                    db_query['db_gene_ids'] = {'$nin': variant_filter.genes}
                else:
                    db_query['db_gene_ids'] = {'$in': variant_filter.genes}
            if variant_filter.ref_freqs:
                for population, freq in variant_filter.ref_freqs:
                    #if population in self._annotator.reference_population_slugs:
                    db_query['db_freqs.' + population] = {'$lte': freq}

        return db_query

コード例 #3

0

ファイルを表示

ファイル: elasticsearch_datastore.py プロジェクト: macarthur-lab/seqr

    def _make_db_query(self, genotype_filter=None, variant_filter=None):
        """
        Caller specifies filters to get_variants, but they are evaluated later.
        Here, we just inspect those filters and see what heuristics we can apply to avoid a full table scan,
        Query here must return a superset of the true get_variants results
        Note that the full annotation isn't stored, so use the fields added by _add_index_fields_to_variant
        """
        db_query = {}

        # genotype filter
        if genotype_filter is not None:
            _add_genotype_filter_to_variant_query(db_query, genotype_filter)

        if variant_filter:
            logger.info(pformat(variant_filter.toJSON()))

            if variant_filter.locations:
                location_ranges = []
                for i, location in enumerate(variant_filter.locations):
                    if isinstance(location, basestring):
                        chrom, pos_range = location.split(":")
                        start, end = pos_range.split("-")
                        xstart = genomeloc.get_xpos(chrom, int(start))
                        xend = genomeloc.get_xpos(chrom, int(end))
                        variant_filter.locations[i] = (xstart, xend)
                    else:
                        xstart, xend = location

                    location_ranges.append({'$and' : [ {'xpos' : {'$gte': xstart }}, {'xpos' : {'$lte': xend }}] })

                db_query['$or'] = location_ranges

            if variant_filter.so_annotations:
                db_query['db_tags'] = {'$in': variant_filter.so_annotations}
            if variant_filter.genes:
                if getattr(variant_filter, 'exclude_genes'):
                    db_query['db_gene_ids'] = {'$nin': variant_filter.genes}
                else:
                    db_query['db_gene_ids'] = {'$in': variant_filter.genes}
            if variant_filter.ref_freqs:
                for population, freq in variant_filter.ref_freqs:
                    #if population in self._annotator.reference_population_slugs:
                    db_query['db_freqs.' + population] = {'$lte': freq}
            if variant_filter.ref_acs:
                for population, ac in variant_filter.ref_acs:
                    db_query['db_acs.' + population] = {'$lte': ac}
            if variant_filter.ref_hom_hemi:
                for population, count in variant_filter.ref_hom_hemi:
                    db_query['db_hemi.' + population] = {'$lte': count}
                    db_query['db_hom.' + population] = {'$lte': count}

        return db_query

コード例 #4

0

ファイルを表示

ファイル: load_CADD.py プロジェクト: zmcv/seqr

def load_from_cadd_file(cadd_file):
    """Utility function to load scores from a CADD file"""

    f = gzip.open(cadd_file)

    # skip header lines
    f.next()
    header = f.next()

    for line in tqdm.tqdm(f):
        # Chrom  Pos     Ref     Alt     RawScore        PHRED
        chrom, pos, ref, alt, raw, phred = line.rstrip('\n').split('\t')

        xpos = genomeloc.get_xpos(chrom, int(pos))

        result = annotator_store.variants.update(
            {
                'xpos': xpos,
                'ref': ref,
                'alt': alt,
                'annotation.cadd_phred': {
                    '$exists': False
                }
            }, {'$set': {
                'annotation.cadd_phred': phred
            }},
            upsert=False)

コード例 #5

0

ファイルを表示

ファイル: import_variant_tags_custom1.py プロジェクト: macarthur-lab/seqr

def add_variant_tag(row, user):
    project_id = (row.get('Linking ID') or row.get('Link -> project ID')).split('/')[4]
    family_id = (row.get('Family ID') or row.get('CMG Internal Project ID(s)')).strip()
    new_tag_name = row['New Tag'].strip()
    gene_symbol = (row.get('Gene symbol') or row.get('Gene Name')).strip()

    try:
        hgvsc = (row.get('HGVS') or row.get('g. coordinate'))
        hgvsc = hgvsc.strip().split(",")[0]
        chrom, _ = hgvsc.split(":")
        chrom = chrom.replace("chr", "")
        pos_ref, alt = _.split(">")
        pos = re.search("[0-9]+", pos_ref).group(0).strip()
        ref = re.search("[ACGT]+", pos_ref).group(0).strip()
        xpos = genomeloc.get_xpos(chrom, int(pos))
    except Exception as e:
        print("Couldn't parse HGVS: %s in row %s. %s" % (hgvsc, row, e))
        return

    try:
        project_tag = ProjectTag.objects.get(project__project_id=project_id, tag__icontains=new_tag_name)
    except ObjectDoesNotExist as e:
        print("project tag not found - %s %s: %s" % (project_id, new_tag_name, e))
        return
        
    try:
        families = get_family(family_id, project_id=project_id)
    except Exception as e:
        print("Unable to get family: %s %s" % (family_id, e))
        return
    
    assert len(families) == 1
    family = families[0]

    for vt in VariantTag.objects.filter(family=family, xpos=xpos, ref=ref, alt=alt):
        if any(k in vt.project_tag.tag.lower() for k in ["tier 1", "tier 2", "known gene for phenotype"]):
            if vt.project_tag.tag != project_tag.tag:
                print("Variant %s tag will be replaced with %s" % (vt, project_tag.tag))
                vt.delete()
            else:
                print("Variant %s already exists in %s %s" % (vt, project_id, family_id))

    variant_tags_by_multiple_users = VariantTag.objects.filter(project_tag=project_tag, family=family, xpos=xpos, ref=ref, alt=alt)
    if len(variant_tags_by_multiple_users) > 1:
        for vt in variant_tags_by_multiple_users:
            if vt.user == user:
                print("Deleting extra tag: " + str(vt))
                vt.delete()
        
    vt, created = VariantTag.objects.get_or_create(project_tag=project_tag, family=family, xpos=xpos, ref=ref, alt=alt)
    if created:
        vt.user = user
        vt.save()
        print("Creating tag: %s" % (vt.toJSON(),))

コード例 #6

0

ファイルを表示

def get_exac_af(chrom, pos, ref, alt):
    populations = ['AMR', 'EAS', 'FIN', 'NFE', 'SAS', 'AFR']

    chrom_without_chr = chrom.replace("chr", "")
    xpos = genomeloc.get_single_location(chrom, pos)
    variant_length = len(ref) + len(alt)

    # check whether the alleles match
    matching_exac_variant = None
    matching_exac_variant_i = None
    for record in exac_vcf.fetch(chrom_without_chr, pos - variant_length,
                                 pos + variant_length):
        exac_xpos = genomeloc.get_xpos(record.CHROM, record.POS)
        for exac_alt_i, exac_alt in enumerate(record.ALT):
            exac_variant_xpos, exac_ref, exac_alt = get_minimal_representation(
                exac_xpos, str(record.REF), str(exac_alt))
            if exac_variant_xpos == xpos and exac_ref == ref and exac_alt == alt:
                if matching_exac_variant is not None:
                    print(
                        "ERROR: multiple exac variants match the variant: %s %s %s %s"
                        % (chrom, pos, ref, alt))
                matching_exac_variant = record
                matching_exac_variant_i = exac_alt_i
                #print("Variant %s %s %s matches: %s %s %s %s" % (xpos, ref, alt, record, exac_variant_xpos, exac_ref, exac_alt) )

    if matching_exac_variant is None:
        #print("Variant %s %s %s %s not found in ExAC" % (chrom, pos, alt, ref))
        return None, None, None

    pop_max_af = -1
    pop_max_population = None
    for p in populations:
        if matching_exac_variant.INFO['AN_' + p] > 0:
            pop_af = matching_exac_variant.INFO[
                'AC_' + p][matching_exac_variant_i] / float(
                    matching_exac_variant.INFO['AN_' + p])
            if pop_af > pop_max_af:
                pop_max_af = pop_af
                pop_max_population = p

    if matching_exac_variant.INFO['AN_Adj'] != 0:
        global_af = float(matching_exac_variant.INFO['AC_Adj']
                          [matching_exac_variant_i]) / float(
                              matching_exac_variant.INFO['AN_Adj'])
    else:
        assert float(
            matching_exac_variant.INFO['AC_Adj'][matching_exac_variant_i]) == 0
        global_af = 0

    return global_af, pop_max_af, pop_max_population

コード例 #7

0

ファイルを表示

ファイル: xbrowse_controls.py プロジェクト: PodioSpaz/seqr

def add_breakpoint_from_dict(project, bp):
    """
    Add a breakpoint to the given project based on keys from the given dict.
    
    The sample id is presumed to already be loaded as an existing individual in the project.
    
    If a breakpoint already exists, it is not updated or changed (even if data loaded is
    actually different). Therefore to reload it is necessary to delete first, but it is 
    safe to load new samples incrementally by just running the load again.
    """

    # Fields in dict are chr     start   end     sample  depth   cscore  partner genes   cdsdist
    xpos = genomeloc.get_xpos(bp['chr'], int(bp['start']))
    sample_id = slugify(bp['sample'], separator='_')
    try:
        breakpoint = Breakpoint.objects.get(project=project,
                                            xpos=xpos,
                                            individual__indiv_id=sample_id)
        existing = True
    except Breakpoint.DoesNotExist:
        existing = False
        breakpoint = Breakpoint()

        breakpoint.xpos = xpos
        breakpoint.project = project
        breakpoint.obs = int(bp['depth'])
        breakpoint.individual = Individual.objects.get(project=project,
                                                       indiv_id=sample_id)
        breakpoint.sample_count = int(bp['sample_count'])
        breakpoint.partner = bp['partner']
        breakpoint.consensus = bp['cscore']
        breakpoint.save()

    for gene_symbol, cds_dist in zip(bp['genes'].split(','),
                                     bp['cdsdist'].split(',')):
        if gene_symbol:
            if existing:
                try:
                    gene = BreakpointGene.objects.get(breakpoint=breakpoint,
                                                      gene_symbol=gene_symbol)
                except BreakpointGene.DoesNotExist:
                    gene = BreakpointGene()
            else:
                gene = BreakpointGene()

            gene.breakpoint = breakpoint
            gene.gene_symbol = gene_symbol
            gene.cds_dist = int(cds_dist)
            gene.save()

コード例 #8

0

ファイルを表示

ファイル: load_CADD.py プロジェクト: macarthur-lab/seqr

def load_from_cadd_file(cadd_file):
    """Utility function to load scores from a CADD file"""
    
    f = gzip.open(cadd_file)

    # skip header lines
    f.next()  
    header = f.next()
        
    for line in tqdm.tqdm(f):
        # Chrom  Pos     Ref     Alt     RawScore        PHRED
        chrom, pos, ref, alt, raw, phred = line.rstrip('\n').split('\t')

        xpos = genomeloc.get_xpos(chrom, int(pos))
        
        result = annotator_store.variants.update({'xpos': xpos, 'ref': ref, 'alt': alt, 'annotation.cadd_phred': {'$exists' : False} }, {'$set': {'annotation.cadd_phred': phred}}, upsert=False)

コード例 #9

0

ファイルを表示

ファイル: xbrowse_controls.py プロジェクト: macarthur-lab/seqr

def add_breakpoint_from_dict(project, bp ):
    """
    Add a breakpoint to the given project based on keys from the given dict.

    The sample id is presumed to already be loaded as an existing individual in the project.

    If a breakpoint already exists, it is not updated or changed (even if data loaded is
    actually different). Therefore to reload it is necessary to delete first, but it is
    safe to load new samples incrementally by just running the load again.
    """

    # Fields in dict are chr     start   end     sample  depth   cscore  partner genes   cdsdist
    xpos = genomeloc.get_xpos(bp['chr'], int(bp['start']))
    sample_id = slugify(bp['sample'], separator='_')
    try:
        breakpoint = Breakpoint.objects.get(project=project, xpos=xpos, individual__indiv_id=sample_id)
        existing = True
    except Breakpoint.DoesNotExist:
        existing = False
        breakpoint = Breakpoint()

        breakpoint.xpos = xpos
        breakpoint.project = project
        breakpoint.obs = int(bp['depth'])
        breakpoint.individual = Individual.objects.get(project=project, indiv_id=sample_id)
        breakpoint.sample_count = int(bp['sample_count'])
        breakpoint.partner = bp['partner']
        breakpoint.consensus = bp['cscore']
        breakpoint.save()

    for gene_symbol,cds_dist in zip(bp['genes'].split(','), bp['cdsdist'].split(',')):
        if gene_symbol:
            if existing:
                try:
                    gene = BreakpointGene.objects.get(breakpoint=breakpoint,
                                                      gene_symbol=gene_symbol)
                except BreakpointGene.DoesNotExist:
                    gene = BreakpointGene()
            else:
                gene = BreakpointGene()

            gene.breakpoint = breakpoint
            gene.gene_symbol = gene_symbol
            gene.cds_dist = int(cds_dist)
            gene.save()

コード例 #10

0

ファイルを表示

ファイル: generate_variant_report.py プロジェクト: mattsolo1/seqr

def get_exac_af(chrom, pos, ref, alt):
    populations = ['AMR', 'EAS', 'FIN', 'NFE', 'SAS', 'AFR']

    chrom_without_chr = chrom.replace("chr", "")
    xpos = genomeloc.get_single_location(chrom, pos)
    variant_length = len(ref) + len(alt)

    # check whether the alleles match
    matching_exac_variant = None
    matching_exac_variant_i = None
    for record in exac_vcf.fetch(chrom_without_chr, pos - variant_length, pos + variant_length):
        exac_xpos = genomeloc.get_xpos(record.CHROM, record.POS)
        for exac_alt_i, exac_alt in enumerate(record.ALT):
            exac_variant_xpos, exac_ref, exac_alt = get_minimal_representation(exac_xpos, str(record.REF), str(exac_alt))
            if exac_variant_xpos == xpos and exac_ref == ref and exac_alt == alt:
                if matching_exac_variant is not None:
                    print("ERROR: multiple exac variants match the variant: %s %s %s %s" % (chrom, pos, ref, alt))
                matching_exac_variant = record
                matching_exac_variant_i = exac_alt_i
                #print("Variant %s %s %s matches: %s %s %s %s" % (xpos, ref, alt, record, exac_variant_xpos, exac_ref, exac_alt) )

    if matching_exac_variant is None:
        #print("Variant %s %s %s %s not found in ExAC" % (chrom, pos, alt, ref))
        return None, None, None

    pop_max_af = -1
    pop_max_population = None
    for p in populations:
        if matching_exac_variant.INFO['AN_'+p] > 0:
            pop_af = matching_exac_variant.INFO['AC_'+p][matching_exac_variant_i]/float(matching_exac_variant.INFO['AN_'+p])
            if pop_af > pop_max_af:
                pop_max_af = pop_af
                pop_max_population = p


    if matching_exac_variant.INFO['AN_Adj'] != 0:
        global_af = float(matching_exac_variant.INFO['AC_Adj'][matching_exac_variant_i])/float(matching_exac_variant.INFO['AN_Adj'])
    else:
        assert float(matching_exac_variant.INFO['AC_Adj'][matching_exac_variant_i]) == 0
        global_af = 0

    return global_af, pop_max_af, pop_max_population

コード例 #11

0

ファイルを表示

ファイル: import_variant_tags_custom1.py プロジェクト: zmcv/seqr

def add_variant_tag(row, user):
    project_id = (row.get('Linking ID')
                  or row.get('Link -> project ID')).split('/')[4]
    family_id = (row.get('Family ID')
                 or row.get('CMG Internal Project ID(s)')).strip()
    new_tag_name = row['New Tag'].strip()
    gene_symbol = (row.get('Gene symbol') or row.get('Gene Name')).strip()

    try:
        hgvsc = (row.get('HGVS') or row.get('g. coordinate'))
        hgvsc = hgvsc.strip().split(",")[0]
        chrom, _ = hgvsc.split(":")
        chrom = chrom.replace("chr", "")
        pos_ref, alt = _.split(">")
        pos = re.search("[0-9]+", pos_ref).group(0).strip()
        ref = re.search("[ACGT]+", pos_ref).group(0).strip()
        xpos = genomeloc.get_xpos(chrom, int(pos))
    except Exception as e:
        print("Couldn't parse HGVS: %s in row %s. %s" % (hgvsc, row, e))
        return

    try:
        project_tag = ProjectTag.objects.get(project__project_id=project_id,
                                             tag__icontains=new_tag_name)
    except ObjectDoesNotExist as e:
        print("project tag not found - %s %s: %s" %
              (project_id, new_tag_name, e))
        return

    try:
        families = get_family(family_id, project_id=project_id)
    except Exception as e:
        print("Unable to get family: %s %s" % (family_id, e))
        return

    assert len(families) == 1
    family = families[0]

    for vt in VariantTag.objects.filter(family=family,
                                        xpos=xpos,
                                        ref=ref,
                                        alt=alt):
        if any(k in vt.project_tag.tag.lower()
               for k in ["tier 1", "tier 2", "known gene for phenotype"]):
            if vt.project_tag.tag != project_tag.tag:
                print("Variant %s tag will be replaced with %s" %
                      (vt, project_tag.tag))
                vt.delete()
            else:
                print("Variant %s already exists in %s %s" %
                      (vt, project_id, family_id))

    variant_tags_by_multiple_users = VariantTag.objects.filter(
        project_tag=project_tag, family=family, xpos=xpos, ref=ref, alt=alt)
    if len(variant_tags_by_multiple_users) > 1:
        for vt in variant_tags_by_multiple_users:
            if vt.user == user:
                print("Deleting extra tag: " + str(vt))
                vt.delete()

    vt, created = VariantTag.objects.get_or_create(project_tag=project_tag,
                                                   family=family,
                                                   xpos=xpos,
                                                   ref=ref,
                                                   alt=alt)
    if created:
        vt.user = user
        vt.save()
        print("Creating tag: %s" % (vt.toJSON(), ))

コード例 #12

0

ファイルを表示

ファイル: search_gene_across_projects.py プロジェクト: xirasasa/seqr

    def search_for_genes(self,
                         gene_or_variant_ids,
                         project_id_list,
                         output_filename,
                         max_af=0.01,
                         knockouts=False,
                         in_clinvar_only=False,
                         include_non_coding=False):
        """
        Search for a gene across project(s)

        Args:
            gene_or_variant_ids (list): 'ENSG..' gene id strings.
            project_id_list (list): (optional) project ids to narrow down the search
            output_filename (string): output file name
            max_af (float): AF filter
            in_clinvar_only (bool):
            include_non_coding (bool):
        """

        projects = [
            Project.objects.get(project_id=project_id)
            for project_id in project_id_list
        ]

        outfile = open(output_filename, 'w')

        header = [
            "project_id", "gene", "chr", "pos", "ref", "alt", "rsID", "filter",
            "impact", "HGVS.c", "HGVS.p", "sift", "polyphen", "muttaster",
            "fathmm", "clinvar_id", "clinvar_clinical_sig",
            "freq_1kg_wgs_phase3", "freq_1kg_wgs_phase3_popmax",
            "freq_exac_v3", "freq_exac_v3_popmax", "gnomad-exomes",
            "gnomad-genomes", "families", "all_genotypes"
        ]

        writer = csv.writer(outfile, delimiter='\t')
        writer.writerow(header)

        # all rare coding variants
        if not knockouts:
            variant_filter = get_default_variant_filter(
                'all_coding',
                mall.get_annotator().reference_population_slugs)
            #variant_filter.set_max_AF(max_af)
            if include_non_coding:
                variant_filter.so_annotations = []
            print("All Filters: ")
            pprint(variant_filter.toJSON())

        #print("Max AF threshold: %s" % max_af)
        print("Starting search for:\n%s\nin projects:\n%s\n" %
              (", ".join(gene_or_variant_ids), ", ".join(
                  [p.project_id for p in projects])))

        for project in projects:
            project_id = project.project_id
            if get_project_datastore(project).project_collection_is_loaded(
                    project):
                print("=====================")
                print("Searching project %s" % project_id)
            else:
                print(
                    "Skipping project %s - gene search is not enabled for this project"
                    % project_id)
                continue

            indiv_cache = {}
            for gene_or_variant_id in gene_or_variant_ids:
                chrom_pos_match = re.match("([0-9XY]{1,2})-([0-9]{1,9})",
                                           gene_or_variant_id)
                chrom_pos_ref_alt_match = re.match(
                    "([0-9XY]{1,2})-([0-9]{1,9})-([ACTG]+)-([ACTG]+)",
                    gene_or_variant_id)

                if chrom_pos_match or chrom_pos_ref_alt_match:
                    chrom = chrom_pos_match.group(1)
                    pos = int(chrom_pos_match.group(2))
                    xpos = genomeloc.get_xpos(chrom, pos)
                    ref = alt = None
                    if chrom_pos_ref_alt_match:
                        ref = chrom_pos_ref_alt_match.group(3)
                        alt = chrom_pos_ref_alt_match.group(4)

                    variant = get_project_datastore(
                        project).get_single_variant(project.project_id, None,
                                                    xpos, ref, alt)
                    if variant is None:
                        continue
                    variants = [variant]
                    print("-- searching %s for variant %s-%s-%s: found %s" %
                          (project_id, xpos, ref, alt, variant))
                    worst_annotation_idx = variant.annotation[
                        'worst_vep_annotation_index']
                    print(variant.annotation["vep_annotation"]
                          [worst_annotation_idx])
                    gene_id = variant.annotation["vep_annotation"][
                        worst_annotation_idx]['gene_id']
                    gene = get_reference().get_gene(gene_id)
                else:
                    gene_id = get_gene_id_from_str(gene_or_variant_id,
                                                   get_reference())
                    gene = get_reference().get_gene(gene_id)
                    print("-- searching %s for gene %s (%s)" %
                          (project_id, gene["symbol"], gene_id))

                    if knockouts:
                        knockout_ids, variation = project_analysis.get_knockouts_in_gene(
                            project, gene_id)
                        variants = variation.get_relevant_variants_for_indiv_ids(
                            knockout_ids)
                    else:
                        variants = project_analysis.get_variants_in_gene(
                            project, gene_id, variant_filter=variant_filter)

                for variant in variants:
                    if not chrom_pos_match and not chrom_pos_ref_alt_match and max(
                            variant.annotation['freqs'].values()) >= max_af:
                        continue

                    add_extra_info_to_variants_project(get_reference(),
                                                       project, [variant])
                    worst_annotation_idx = variant.annotation[
                        "worst_vep_index_per_gene"].get(gene_id)

                    if worst_annotation_idx is not None:
                        worst_annotation = variant.annotation[
                            "vep_annotation"][worst_annotation_idx]
                    else:
                        worst_annotation = None
                    all_genotypes_list = []
                    pass_filter = "N/A"
                    family_ids = set()
                    for indiv_id, genotype in variant.genotypes.items():
                        if indiv_id in indiv_cache:
                            individual = indiv_cache[indiv_id]
                            if individual == 'deleted':
                                continue
                        else:
                            try:
                                individual = Individual.objects.get(
                                    project=project, indiv_id=indiv_id)
                                indiv_cache[indiv_id] = individual
                            except ObjectDoesNotExist:
                                # this can happen when an individual is deleted from the project - from postgres, but not from mong
                                indiv_cache[indiv_id] = 'deleted'
                                continue
                            except MultipleObjectsReturned:
                                # when several families have an individual with the same id
                                individuals = Individual.objects.filter(
                                    project=project, indiv_id=indiv_id)
                                individual = individuals[0]
                                indiv_cache[indiv_id] = individual

                        pass_filter = genotype.filter  # filter value is stored in the genotypes even though it's the same for all individuals
                        if genotype.num_alt > 0:
                            family_ids.add(individual.family.family_id)
                            all_genotypes_list.append(
                                "%s/%s%s[gt:%s GQ:%s AB:%0.3f]" %
                                (individual.family.family_id, indiv_id,
                                 "[Affected]" if individual.affected == "A"
                                 else ("[-]" if individual.affected == "N" else
                                       "[?]"), ">".join(genotype.alleles),
                                 genotype.gq, genotype.ab
                                 if genotype.ab is not None else float('NaN')))

                    if len(all_genotypes_list) == 0:
                        continue

                    measureset_id, clinvar_significance = get_reference(
                    ).get_clinvar_info(*variant.unique_tuple())
                    if in_clinvar_only and (
                            not clinvar_significance
                            or "path" not in clinvar_significance.lower()):
                        continue

                    row = map(str, [
                        project_id,
                        gene,
                        variant.chr,
                        variant.pos,
                        variant.ref,
                        variant.alt,
                        variant.vcf_id or "",
                        pass_filter,
                        variant.annotation.get("vep_consequence", ""),
                        worst_annotation.get("hgvsc", "")
                        if worst_annotation else "",
                        (worst_annotation.get("hgvsp", "") or "").replace(
                            "%3D", "=") if worst_annotation else "",
                        worst_annotation.get("sift", "")
                        if worst_annotation else "",
                        worst_annotation.get("polyphen", "")
                        if worst_annotation else "",
                        worst_annotation.get("mutationtaster_pred", "")
                        if worst_annotation else "",
                        ";".join(
                            set(
                                worst_annotation.get("fathmm_pred",
                                                     "").split('%3B')))
                        if worst_annotation else "",
                        measureset_id,
                        clinvar_significance,
                        variant.annotation["freqs"].get("1kg_wgs_phase3", ""),
                        variant.annotation["freqs"].get(
                            "1kg_wgs_phase3_popmax", ""),
                        variant.annotation["freqs"].get("exac_v3", ""),
                        variant.annotation["freqs"].get("exac_v3_popmax", ""),
                        variant.annotation["freqs"].get("gnomad-exomes2", ""),
                        variant.annotation["freqs"].get("gnomad-genomes2", ""),
                        ", ".join(sorted(list(family_ids))),
                        ", ".join(all_genotypes_list),
                    ])

                    writer.writerow(row)

        outfile.close()
        print("Wrote out %s" % output_filename)