Example #1
0
    def test_query_single_variant(self, mock_get_variant):
        mock_get_variant.return_value = VARIANTS[0]

        url = '{}?familyGuid=F000001_1'.format(reverse(query_single_variant_handler, args=['21-3343353-GAGA-G']))
        self.check_collaborator_login(url)

        response = self.client.get(url)
        self.assertEqual(response.status_code, 200)
        response_json = response.json()
        self.assertSetEqual(
            set(response_json.keys()),
            {'searchedVariants', 'savedVariantsByGuid', 'genesById', 'projectsByGuid', 'familiesByGuid',
             'individualsByGuid', 'samplesByGuid', 'locusListsByGuid', 'analysisGroupsByGuid', 'variantTagsByGuid',
             'variantNotesByGuid', 'variantFunctionalDataByGuid', 'igvSamplesByGuid', }
        )

        self.assertListEqual(response_json['searchedVariants'], VARIANTS[:1])
        self.assertSetEqual(set(response_json['savedVariantsByGuid'].keys()), {'SV0000001_2103343353_r0390_100'})
        self.assertSetEqual(set(response_json['genesById'].keys()), {'ENSG00000227232', 'ENSG00000268903'})
        self.assertTrue('F000001_1' in response_json['familiesByGuid'])

        mock_get_variant.side_effect = InvalidSearchException('Variant not found')
        response = self.client.get(url)
        self.assertEqual(response.status_code, 400)
        self.assertEqual(response.json()['error'], 'Variant not found')
Example #2
0
    def _parse_response(self, response):
        if len(response.aggregations.genes.buckets) > MAX_COMPOUND_HET_GENES:
            from seqr.utils.elasticsearch.utils import InvalidSearchException
            raise InvalidSearchException('This search returned too many genes')

        gene_counts = defaultdict(lambda: {'total': 0, 'families': defaultdict(int), 'sample_ids': set()})
        for gene_agg in response.aggregations.genes.buckets:
            gene_id = gene_agg['key']
            gene_counts[gene_id]['total'] += gene_agg['doc_count']
            if 'vars_by_gene' in gene_agg:
                for hit in gene_agg['vars_by_gene']:
                    gene_counts[gene_id]['sample_ids'].add(hit.meta.id)
                    for family_guid in hit.meta.matched_queries:
                        gene_counts[gene_id]['families'][family_guid] += 1
            else:
                families_by_sample = {}
                for index_samples_by_family in self.samples_by_family_index.values():
                    for family_guid, samples_by_id in index_samples_by_family.items():
                        for sample_id in samples_by_id.keys():
                            families_by_sample[sample_id] = family_guid

                for key in HAS_ALT_FIELD_KEYS:
                    for sample_agg in gene_agg[key]['buckets']:
                        family_guid = families_by_sample.get(sample_agg['key'])
                        if family_guid:
                            gene_counts[gene_id]['families'][family_guid] += sample_agg['doc_count']
                            gene_counts[gene_id]['sample_ids'].add(sample_agg['key'])
                        else:
                            # samples may be returned that are not part of the searched families if they have no
                            # affected individuals and were removed from the "any affected" search.
                            gene_counts[gene_id]['total'] -= sample_agg['doc_count']

        return gene_counts
Example #3
0
    def test_query_variants(self, mock_get_variants, mock_get_gene_counts,
                            mock_error_logger, mock_analyst_group):
        url = reverse(query_variants_handler, args=['abc'])
        self.check_collaborator_login(
            url, request_data={'projectFamilies': PROJECT_FAMILIES})
        url = reverse(query_variants_handler, args=[SEARCH_HASH])

        # add a locus list
        LocusList.objects.get(guid=LOCUS_LIST_GUID).projects.add(
            Project.objects.get(guid=PROJECT_GUID))

        # Test invalid inputs
        response = self.client.get(url)
        self.assertEqual(response.status_code, 400)
        self.assertEqual(response.reason_phrase,
                         'Invalid search hash: {}'.format(SEARCH_HASH))
        mock_error_logger.assert_not_called()

        response = self.client.post(url,
                                    content_type='application/json',
                                    data=json.dumps({'search': SEARCH}))
        self.assertEqual(response.status_code, 400)
        self.assertEqual(response.reason_phrase,
                         'Invalid search: no projects/ families specified')
        mock_error_logger.assert_not_called()

        mock_get_variants.side_effect = InvalidIndexException('Invalid index')
        response = self.client.post(url,
                                    content_type='application/json',
                                    data=json.dumps({
                                        'projectFamilies': PROJECT_FAMILIES,
                                        'search': SEARCH
                                    }))
        self.assertEqual(response.status_code, 400)
        self.assertEqual(response.json()['error'], 'Invalid index')
        mock_error_logger.assert_called_with('Invalid index', extra=mock.ANY)

        mock_get_variants.side_effect = InvalidSearchException(
            'Invalid search')
        mock_error_logger.reset_mock()
        response = self.client.post(url,
                                    content_type='application/json',
                                    data=json.dumps({
                                        'projectFamilies': PROJECT_FAMILIES,
                                        'search': SEARCH
                                    }))
        self.assertEqual(response.status_code, 400)
        self.assertEqual(response.json()['error'], 'Invalid search')
        mock_error_logger.assert_not_called()

        mock_get_variants.side_effect = ConnectionTimeout(
            '', '', ValueError('Timeout'))
        response = self.client.post(url,
                                    content_type='application/json',
                                    data=json.dumps({
                                        'projectFamilies': PROJECT_FAMILIES,
                                        'search': SEARCH
                                    }))
        self.assertEqual(response.status_code, 504)
        self.assertEqual(response.json()['error'],
                         'ConnectionTimeout caused by - ValueError(Timeout)')
        mock_error_logger.assert_not_called()

        mock_get_variants.side_effect = TransportError(
            'N/A', 'search_phase_execution_exception', {'error': 'Invalid'})
        response = self.client.post(url,
                                    content_type='application/json',
                                    data=json.dumps({
                                        'projectFamilies': PROJECT_FAMILIES,
                                        'search': SEARCH
                                    }))
        self.assertEqual(response.status_code, 400)
        self.assertEqual(
            response.json()['error'],
            "TransportError: N/A - 'search_phase_execution_exception' - 'Invalid'"
        )
        self.assertEqual(response.json()['detail'], {'error': 'Invalid'})
        mock_error_logger.assert_not_called()

        error_info_json = {
            'error': {
                'root_cause': [{
                    'type':
                    'response_handler_failure_transport_exception'
                }]
            }
        }
        mock_get_variants.side_effect = TransportError(
            '401', 'search_phase_execution_exception', error_info_json)
        response = self.client.post(url,
                                    content_type='application/json',
                                    data=json.dumps({
                                        'projectFamilies': PROJECT_FAMILIES,
                                        'search': SEARCH
                                    }))
        self.assertEqual(response.status_code, 401)
        self.assertEqual(
            response.json()['error'],
            "TransportError: 401 - 'search_phase_execution_exception' - response_handler_failure_transport_exception"
        )
        self.assertEqual(response.json()['detail'], error_info_json)
        mock_error_logger.assert_not_called()

        mock_get_variants.side_effect = _get_es_variants

        # Test new search
        response = self.client.post(url,
                                    content_type='application/json',
                                    data=json.dumps({
                                        'projectFamilies': PROJECT_FAMILIES,
                                        'search': SEARCH
                                    }))
        self.assertEqual(response.status_code, 200)
        response_json = response.json()
        self.assertSetEqual(
            set(response_json.keys()), {
                'searchedVariants', 'savedVariantsByGuid', 'genesById',
                'search', 'variantTagsByGuid', 'variantNotesByGuid',
                'variantFunctionalDataByGuid', 'locusListsByGuid'
            })
        self.assertListEqual(response_json['searchedVariants'], VARIANTS)
        self.assertDictEqual(
            response_json['search'], {
                'search':
                SEARCH,
                'projectFamilies': [{
                    'projectGuid': PROJECT_GUID,
                    'familyGuids': mock.ANY
                }],
                'totalResults':
                3,
            })
        self.assertSetEqual(
            set(response_json['search']['projectFamilies'][0]['familyGuids']),
            {'F000001_1', 'F000002_2'})
        self.assertSetEqual(set(response_json['savedVariantsByGuid'].keys()), {
            'SV0000001_2103343353_r0390_100', 'SV0000002_1248367227_r0390_100'
        })
        self.assertSetEqual(
            set(response_json['genesById'].keys()),
            {'ENSG00000227232', 'ENSG00000268903', 'ENSG00000233653'})
        gene_fields = {'locusListGuids'}
        gene_fields.update(GENE_VARIANT_FIELDS)
        self.assertSetEqual(
            set(response_json['genesById']['ENSG00000227232'].keys()),
            gene_fields)
        self.assertListEqual(
            response_json['genesById']['ENSG00000227232']['locusListGuids'],
            [LOCUS_LIST_GUID])
        self.assertSetEqual(set(response_json['locusListsByGuid'].keys()),
                            {LOCUS_LIST_GUID})
        intervals = response_json['locusListsByGuid'][LOCUS_LIST_GUID][
            'intervals']
        self.assertEqual(len(intervals), 2)
        self.assertSetEqual(
            set(intervals[0].keys()), {
                'locusListGuid', 'locusListIntervalGuid', 'genomeVersion',
                'chrom', 'start', 'end'
            })

        results_model = VariantSearchResults.objects.get(
            search_hash=SEARCH_HASH)
        mock_get_variants.assert_called_with(results_model,
                                             sort='xpos',
                                             page=1,
                                             num_results=100,
                                             skip_genotype_filter=False)
        mock_error_logger.assert_not_called()

        # Test pagination
        response = self.client.get('{}?page=3'.format(url))
        self.assertEqual(response.status_code, 200)
        mock_get_variants.assert_called_with(results_model,
                                             sort='xpos',
                                             page=3,
                                             num_results=100,
                                             skip_genotype_filter=False)
        mock_error_logger.assert_not_called()

        # Test sort
        response = self.client.get('{}?sort=pathogenicity'.format(url))
        self.assertEqual(response.status_code, 200)
        mock_get_variants.assert_called_with(results_model,
                                             sort='pathogenicity',
                                             page=1,
                                             num_results=100,
                                             skip_genotype_filter=False)
        mock_error_logger.assert_not_called()

        # Test export
        export_url = reverse(export_variants_handler, args=[SEARCH_HASH])
        response = self.client.get(export_url)
        self.assertEqual(response.status_code, 200)
        expected_content = [
            [
                'chrom', 'pos', 'ref', 'alt', 'gene', 'worst_consequence',
                '1kg_freq', 'exac_freq', 'gnomad_genomes_freq',
                'gnomad_exomes_freq', 'topmed_freq', 'cadd', 'revel', 'eigen',
                'polyphen', 'sift', 'muttaster', 'fathmm', 'rsid', 'hgvsc',
                'hgvsp', 'clinvar_clinical_significance', 'clinvar_gold_stars',
                'filter', 'family_id_1', 'tags_1', 'notes_1', 'family_id_2',
                'tags_2', 'notes_2', 'sample_1', 'num_alt_alleles_1', 'gq_1',
                'ab_1', 'sample_2', 'num_alt_alleles_2', 'gq_2', 'ab_2'
            ],
            [
                '21', '3343400', 'GAGA', 'G', 'WASH7P', 'missense_variant', '',
                '', '', '', '', '', '', '', '', '', '', '', '',
                'ENST00000623083.3:c.1075G>A', 'ENSP00000485442.1:p.Gly359Ser',
                '', '', '', '1',
                'Tier 1 - Novel gene and phenotype (None)|Review (None)', '',
                '2', '', '', 'NA19675', '1', '46.0', '0.702127659574',
                'NA19679', '0', '99.0', '0.0'
            ],
            [
                '3', '835', 'AAAG', 'A', '', '', '', '', '', '', '', '', '',
                '', '', '', '', '', '', '', '', '', '', '', '1', '', '', '',
                '', '', 'NA19679', '0', '99.0', '0.0', '', '', '', ''
            ],
            [
                '12', '48367227', 'TC', 'T', '', '', '', '', '', '', '', '',
                '', '', '', '', '', '', '', '', '', '', '', '', '2',
                'Known gene for phenotype (None)|Excluded (None)',
                'test n\xf8te (None)', '', '', '', '', '', '', '', '', '', '',
                ''
            ]
        ]
        self.assertEqual(
            response.content,
            ('\n'.join(['\t'.join(line)
                        for line in expected_content]) + '\n').encode('utf-8'))

        mock_get_variants.assert_called_with(results_model,
                                             page=1,
                                             load_all=True)
        mock_error_logger.assert_not_called()

        # Test gene breakdown
        gene_counts = {
            'ENSG00000227232': {
                'total': 2,
                'families': {
                    'F000001_1': 2,
                    'F000002_2': 1
                }
            },
            'ENSG00000268903': {
                'total': 1,
                'families': {
                    'F000002_2': 1
                }
            }
        }
        mock_get_gene_counts.return_value = gene_counts

        gene_breakdown_url = reverse(get_variant_gene_breakdown,
                                     args=[SEARCH_HASH])
        response = self.client.get(gene_breakdown_url)
        self.assertEqual(response.status_code, 200)
        response_json = response.json()
        self.assertSetEqual(set(response_json.keys()),
                            {'searchGeneBreakdown', 'genesById'})
        self.assertDictEqual(response_json['searchGeneBreakdown'],
                             {SEARCH_HASH: gene_counts})
        self.assertSetEqual(set(response_json['genesById'].keys()),
                            {'ENSG00000227232', 'ENSG00000268903'})
        gene_fields = {
            'constraints', 'omimPhenotypes', 'mimNumber', 'cnSensitivity'
        }
        gene_fields.update(GENE_FIELDS)
        self.assertSetEqual(
            set(response_json['genesById']['ENSG00000227232'].keys()),
            gene_fields)

        # Test compound hets
        mock_get_variants.side_effect = _get_compound_het_es_variants
        response = self.client.post(url,
                                    content_type='application/json',
                                    data=json.dumps({
                                        'projectFamilies': PROJECT_FAMILIES,
                                        'search': SEARCH
                                    }))
        self.assertEqual(response.status_code, 200)
        response_json = response.json()
        self.assertSetEqual(
            set(response_json.keys()), {
                'searchedVariants', 'savedVariantsByGuid', 'genesById',
                'search', 'variantTagsByGuid', 'variantNotesByGuid',
                'variantFunctionalDataByGuid', 'locusListsByGuid'
            })
        self.assertListEqual(response_json['searchedVariants'],
                             COMP_HET_VARAINTS)
        self.assertSetEqual(set(response_json['savedVariantsByGuid'].keys()),
                            {'SV0000002_1248367227_r0390_100'})
        self.assertSetEqual(set(response_json['genesById'].keys()),
                            {'ENSG00000233653'})
        mock_error_logger.assert_not_called()

        # Test cross-project discovery for analyst users
        self.login_analyst_user()
        mock_get_variants.side_effect = _get_es_variants
        response = self.client.get('{}?sort=pathogenicity'.format(url))
        self.assertEqual(response.status_code, 403)

        mock_analyst_group.__bool__.return_value = True
        mock_analyst_group.resolve_expression.return_value = 'analysts'
        response = self.client.get('{}?sort=pathogenicity'.format(url))
        self.assertEqual(response.status_code, 200)
        response_json = response.json()
        self.assertSetEqual(
            set(response_json.keys()), {
                'searchedVariants', 'savedVariantsByGuid', 'genesById',
                'search', 'variantTagsByGuid', 'variantNotesByGuid',
                'variantFunctionalDataByGuid', 'familiesByGuid',
                'locusListsByGuid'
            })

        self.assertListEqual(response_json['searchedVariants'],
                             VARIANTS_WITH_DISCOVERY_TAGS)
        self.assertSetEqual(set(response_json['familiesByGuid'].keys()),
                            {'F000011_11'})
        mock_get_variants.assert_called_with(results_model,
                                             sort='pathogenicity_hgmd',
                                             page=1,
                                             num_results=100,
                                             skip_genotype_filter=False)
        mock_error_logger.assert_not_called()

        # Test no results
        mock_get_variants.side_effect = _get_empty_es_variants
        response = self.client.post(url,
                                    content_type='application/json',
                                    data=json.dumps({
                                        'projectFamilies': PROJECT_FAMILIES,
                                        'search': SEARCH
                                    }))
        self.assertEqual(response.status_code, 200)
        response_json = response.json()
        self.assertDictEqual(
            response_json, {
                'searchedVariants': [],
                'search': {
                    'search': SEARCH,
                    'projectFamilies': PROJECT_FAMILIES,
                    'totalResults': 0,
                }
            })
        mock_error_logger.assert_not_called()