def test_elasticsearch_status(self): url = reverse(elasticsearch_status) self.check_data_manager_login(url) urllib3_responses.add_json( '/_cat/allocation?format=json&h=node,shards,disk.avail,disk.used,disk.percent', ES_CAT_ALLOCATION) urllib3_responses.add_json( '/_cat/nodes?format=json&h=name,heap.percent', ES_CAT_NODES) urllib3_responses.add_json( '/_cat/indices?format=json&h=index,docs.count,store.size,creation.date.string', ES_CAT_INDICES) urllib3_responses.add_json('/_cat/aliases?format=json&h=alias,index', ES_CAT_ALIAS) urllib3_responses.add_json('/_all/_mapping', ES_INDEX_MAPPING) response = self.client.get(url) self.assertEqual(response.status_code, 200) response_json = response.json() self.assertSetEqual( set(response_json.keys()), {'indices', 'errors', 'diskStats', 'elasticsearchHost'}) self.assertEqual(len(response_json['indices']), 5) self.assertDictEqual(response_json['indices'][0], TEST_INDEX_EXPECTED_DICT) self.assertDictEqual(response_json['indices'][3], TEST_INDEX_NO_PROJECT_EXPECTED_DICT) self.assertDictEqual(response_json['indices'][4], TEST_SV_INDEX_EXPECTED_DICT) self.assertListEqual(response_json['errors'], EXPECTED_ERRORS) self.assertListEqual(response_json['diskStats'], EXPECTED_DISK_ALLOCATION)
def test_delete_index(self): url = reverse(delete_index) self.check_data_manager_login(url) response = self.client.post(url, content_type='application/json', data=json.dumps({'index': 'test_index'})) self.assertEqual(response.status_code, 403) self.assertDictEqual(response.json(), ({ 'error': 'Index "test_index" is still used by: 1kg project n\xe5me with uni\xe7\xf8de' })) self.assertEqual(len(urllib3_responses.calls), 0) urllib3_responses.add_json( '/_cat/indices?format=json&h=index,docs.count,store.size,creation.date.string', ES_CAT_INDICES) urllib3_responses.add_json('/_cat/aliases?format=json&h=alias,index', ES_CAT_ALIAS) urllib3_responses.add_json('/_all/_mapping', ES_INDEX_MAPPING) urllib3_responses.add(urllib3_responses.DELETE, '/unused_index') response = self.client.post(url, content_type='application/json', data=json.dumps({'index': 'unused_index'})) self.assertEqual(response.status_code, 200) response_json = response.json() self.assertSetEqual(set(response_json.keys()), {'indices'}) self.assertEqual(len(response_json['indices']), 5) self.assertDictEqual(response_json['indices'][0], TEST_INDEX_EXPECTED_DICT) self.assertDictEqual(response_json['indices'][3], TEST_INDEX_NO_PROJECT_EXPECTED_DICT) self.assertDictEqual(response_json['indices'][4], TEST_SV_INDEX_EXPECTED_DICT) self.assertEqual(urllib3_responses.calls[0].request.method, 'DELETE')
def test_add_variants_dataset(self, mock_open, mock_random, mock_redis): mock_file_iter = mock_open.return_value.__enter__.return_value.__iter__ url = reverse(add_variants_dataset_handler, args=[PROJECT_GUID]) self.check_data_manager_login(url) # Confirm test DB is as expected existing_index_sample = Sample.objects.get(sample_id='NA19675') self.assertEqual(existing_index_sample.elasticsearch_index, INDEX_NAME) self.assertTrue(existing_index_sample.is_active) existing_index_sample_guid = existing_index_sample.guid existing_old_index_sample = Sample.objects.get(guid='S000130_na19678') self.assertNotEqual(existing_old_index_sample.elasticsearch_index, INDEX_NAME) self.assertTrue(existing_old_index_sample.is_active) existing_old_index_sample_guid = existing_old_index_sample.guid existing_sample = Sample.objects.get(sample_id='NA19679') self.assertEqual(existing_sample.elasticsearch_index, INDEX_NAME) self.assertFalse(existing_sample.is_active) existing_sample_guid = existing_sample.guid self.assertEqual( Sample.objects.filter(sample_id='NA19678_1').count(), 0) mock_random.return_value = 98765432101234567890 urllib3_responses.add_json( '/{}/_mapping'.format(INDEX_NAME), { INDEX_NAME: { 'mappings': { "properties": MAPPING_PROPS_SAMPLES_NUM_ALT_1 } } }) urllib3_responses.add_json( '/{}/_search?size=0'.format(INDEX_NAME), {'aggregations': { 'sample_ids': { 'buckets': [] } }}, method=urllib3_responses.POST) # Send invalid requests response = self.client.post(url, content_type='application/json', data=json.dumps({})) self.assertEqual(response.status_code, 400) self.assertDictEqual( response.json(), { 'errors': [ 'request must contain fields: elasticsearchIndex, datasetType' ] }) response = self.client.post(url, content_type='application/json', data=json.dumps({ 'elasticsearchIndex': INDEX_NAME, 'datasetType': 'NOT_A_TYPE' })) self.assertEqual(response.status_code, 400) self.assertDictEqual(response.json(), {'errors': ['Invalid dataset type "NOT_A_TYPE"']}) response = self.client.post(url, content_type='application/json', data=ADD_DATASET_PAYLOAD) self.assertEqual(response.status_code, 400) self.assertDictEqual( response.json(), { 'errors': [ 'No samples found in the index. Make sure the specified caller type is correct' ] }) self.assertEqual(len(urllib3_responses.calls), 2) self.assertDictEqual( urllib3_responses.call_request_json(), { 'aggs': { 'sample_ids': { 'terms': { 'field': 'samples_num_alt_1', 'size': 10000 } } } }) urllib3_responses.replace_json( '/{}/_search?size=0'.format(INDEX_NAME), { 'aggregations': { 'sample_ids': { 'buckets': [{ 'key': 'NA19679' }, { 'key': 'NA19678_1' }] } } }, method=urllib3_responses.POST) response = self.client.post(url, content_type='application/json', data=ADD_DATASET_PAYLOAD) self.assertEqual(response.status_code, 400) self.assertDictEqual( response.json(), { 'errors': [ 'Index metadata must contain fields: genomeVersion, sampleType, sourceFilePath' ] }) urllib3_responses.replace_json( '/{}/_mapping'.format(INDEX_NAME), { INDEX_NAME: { 'mappings': { '_meta': { 'sampleType': 'NOT_A_TYPE', 'genomeVersion': '37', 'sourceFilePath': 'invalidpath.txt', }, "properties": MAPPING_PROPS_SAMPLES_NUM_ALT_1 } } }) response = self.client.post(url, content_type='application/json', data=ADD_DATASET_PAYLOAD) self.assertEqual(response.status_code, 400) self.assertDictEqual( response.json(), {'errors': ['Sample type not supported: NOT_A_TYPE']}) urllib3_responses.replace_json( '/{}/_mapping'.format(INDEX_NAME), { INDEX_NAME: { 'mappings': { '_meta': { 'sampleType': 'WES', 'genomeVersion': '38', 'sourceFilePath': 'invalidpath.txt', }, "properties": MAPPING_PROPS_SAMPLES_NUM_ALT_1 } } }) response = self.client.post(url, content_type='application/json', data=ADD_DATASET_PAYLOAD) self.assertEqual(response.status_code, 400) self.assertDictEqual( response.json(), { 'errors': [ 'Index "test_index" has genome version 38 but this project uses version 37' ] }) urllib3_responses.replace_json( '/{}/_mapping'.format(INDEX_NAME), { INDEX_NAME: { 'mappings': { '_meta': { 'sampleType': 'WES', 'genomeVersion': '37', 'sourceFilePath': 'invalidpath.txt', }, "properties": MAPPING_PROPS_SAMPLES_NUM_ALT_1 } } }) response = self.client.post(url, content_type='application/json', data=ADD_DATASET_PAYLOAD) self.assertEqual(response.status_code, 400) self.assertDictEqual( response.json(), { 'errors': [ 'Variant call dataset path must end with .vds or .vcf.gz or .bgz or .bed' ] }) urllib3_responses.replace_json( '/{}/_mapping'.format(INDEX_NAME), { INDEX_NAME: { 'mappings': { '_meta': { 'sampleType': 'WES', 'genomeVersion': '37', 'sourceFilePath': 'test_data.vds', 'datasetType': 'SV', }, "properties": MAPPING_PROPS_WITH_SAMPLES } } }) response = self.client.post(url, content_type='application/json', data=ADD_DATASET_PAYLOAD) self.assertEqual(response.status_code, 400) self.assertDictEqual( response.json(), { 'errors': [ 'Index "test_index" has dataset type SV but expects VARIANTS' ] }) urllib3_responses.replace_json( '/{}/_mapping'.format(INDEX_NAME), { INDEX_NAME: { 'mappings': { '_meta': { 'sampleType': 'WES', 'genomeVersion': '37', 'sourceFilePath': 'test_data.vds', }, "properties": MAPPING_PROPS_SAMPLES_NUM_ALT_1 } } }) response = self.client.post(url, content_type='application/json', data=ADD_DATASET_PAYLOAD) self.assertEqual(response.status_code, 400) self.assertDictEqual( response.json(), { 'errors': [ 'Matches not found for ES sample ids: NA19678_1. Uploading a mapping file for these samples, or select the "Ignore extra samples in callset" checkbox to ignore.' ] }) response = self.client.post(url, content_type='application/json', data=json.dumps({ 'elasticsearchIndex': INDEX_NAME, 'datasetType': 'VARIANTS', 'ignoreExtraSamplesInCallset': True, })) self.assertEqual(response.status_code, 400) self.assertDictEqual( response.json(), { 'errors': [ 'The following families are included in the callset but are missing some family members: 1 (NA19675_1, NA19678).' ] }) urllib3_responses.replace_json('/{}/_search?size=0'.format(INDEX_NAME), { 'aggregations': { 'sample_ids': { 'buckets': [{ 'key': 'NA19673' }] } } }, method=urllib3_responses.POST) response = self.client.post(url, content_type='application/json', data=json.dumps({ 'elasticsearchIndex': INDEX_NAME, 'datasetType': 'VARIANTS', 'ignoreExtraSamplesInCallset': True, })) self.assertEqual(response.status_code, 400) self.assertDictEqual( response.json(), { 'errors': [ 'None of the individuals or samples in the project matched the 1 expected sample id(s)' ] }) mock_file_iter.return_value = StringIO('NA19678_1,NA19678,metadata\n') response = self.client.post(url, content_type='application/json', data=json.dumps({ 'elasticsearchIndex': INDEX_NAME, 'mappingFilePath': 'mapping.csv', 'datasetType': 'VARIANTS', })) self.assertEqual(response.status_code, 400) self.assertDictEqual(response.json(), { 'errors': ['Must contain 2 columns: NA19678_1, NA19678, metadata'] }) # Send valid request urllib3_responses.replace_json( '/{}/_search?size=0'.format(INDEX_NAME), { 'aggregations': { 'sample_ids': { 'buckets': [{ 'key': 'NA19675' }, { 'key': 'NA19679' }, { 'key': 'NA19678_1' }] } } }, method=urllib3_responses.POST) mock_file_iter.return_value = StringIO('NA19678_1,NA19678\n') response = self.client.post(url, content_type='application/json', data=json.dumps({ 'elasticsearchIndex': INDEX_NAME, 'mappingFilePath': 'mapping.csv', 'datasetType': 'VARIANTS', })) self.assertEqual(response.status_code, 200) mock_open.assert_called_with('mapping.csv', 'r') mock_redis.return_value.get.assert_called_with( 'index_metadata__test_index') mock_redis.return_value.set.assert_called_with( 'index_metadata__test_index', '{"test_index": {"sampleType": "WES", "genomeVersion": "37", "sourceFilePath": "test_data.vds", "fields": {"samples_num_alt_1": "keyword"}}}' ) response_json = response.json() self.assertSetEqual( set(response_json.keys()), {'samplesByGuid', 'individualsByGuid', 'familiesByGuid'}) new_sample_guid = 'S98765432101234567890_NA19678_' self.assertSetEqual( set(response_json['samplesByGuid'].keys()), { existing_index_sample_guid, existing_sample_guid, existing_old_index_sample_guid, new_sample_guid }) self.assertDictEqual( response_json['individualsByGuid'], { 'I000001_na19675': { 'sampleGuids': [existing_index_sample_guid] }, 'I000002_na19678': { 'sampleGuids': mock.ANY }, 'I000003_na19679': { 'sampleGuids': [existing_sample_guid] }, }) self.assertSetEqual( set(response_json['individualsByGuid']['I000002_na19678'] ['sampleGuids']), {new_sample_guid, existing_old_index_sample_guid}) self.assertDictEqual(response_json['familiesByGuid'], {'F000001_1': { 'analysisStatus': 'I' }}) updated_samples = [ sample for sample_guid, sample in response_json['samplesByGuid'].items() if sample_guid != existing_old_index_sample_guid ] self.assertSetEqual( {'WES'}, {sample['sampleType'] for sample in updated_samples}) self.assertSetEqual({True}, {sample['isActive'] for sample in updated_samples}) self.assertDictEqual( response_json['samplesByGuid'][existing_old_index_sample_guid], {'isActive': False}) # Only the new/updated samples should have an updated loaded date self.assertTrue( response_json['samplesByGuid'][existing_index_sample_guid] ['loadedDate'].startswith('2017-02-05')) today = datetime.now().strftime('%Y-%m-%d') self.assertTrue(response_json['samplesByGuid'][existing_sample_guid] ['loadedDate'].startswith(today)) self.assertTrue(response_json['samplesByGuid'][new_sample_guid] ['loadedDate'].startswith(today)) updated_sample_models = Sample.objects.filter( guid__in=[sample['sampleGuid'] for sample in updated_samples]) self.assertEqual(len(updated_sample_models), 3) self.assertSetEqual( {INDEX_NAME}, {sample.elasticsearch_index for sample in updated_sample_models}) # Adding an SV index works additively with the regular variants index mock_random.return_value = 1234567 urllib3_responses.add_json( '/{}/_mapping'.format(SV_INDEX_NAME), { SV_INDEX_NAME: { 'mappings': { '_meta': { 'sampleType': 'WES', 'genomeVersion': '37', 'sourceFilePath': 'test_data.bed', 'datasetType': 'SV', }, "properties": MAPPING_PROPS_WITH_SAMPLES } } }) urllib3_responses.add_json('/{}/_search?size=0'.format(SV_INDEX_NAME), { 'aggregations': { 'sample_ids': { 'buckets': [{ 'key': 'NA19675_1' }] } } }, method=urllib3_responses.POST) response = self.client.post(url, content_type='application/json', data=json.dumps({ 'elasticsearchIndex': SV_INDEX_NAME, 'datasetType': 'SV', })) self.assertEqual(response.status_code, 200) self.assertDictEqual(urllib3_responses.call_request_json(), { 'aggs': { 'sample_ids': { 'terms': { 'field': 'samples', 'size': 10000 } } } }) response_json = response.json() self.assertSetEqual( set(response_json.keys()), {'samplesByGuid', 'individualsByGuid', 'familiesByGuid'}) sv_sample_guid = 'S1234567_NA19675_1' self.assertDictEqual(response_json['familiesByGuid'], {}) self.assertListEqual(list(response_json['samplesByGuid'].keys()), [sv_sample_guid]) self.assertEqual( response_json['samplesByGuid'][sv_sample_guid]['datasetType'], 'SV') self.assertEqual( response_json['samplesByGuid'][sv_sample_guid]['sampleType'], 'WES') self.assertTrue( response_json['samplesByGuid'][sv_sample_guid]['isActive']) self.assertListEqual(list(response_json['individualsByGuid'].keys()), ['I000001_na19675']) self.assertListEqual( list(response_json['individualsByGuid']['I000001_na19675'].keys()), ['sampleGuids']) self.assertSetEqual( set(response_json['individualsByGuid']['I000001_na19675'] ['sampleGuids']), set([sv_sample_guid, existing_index_sample_guid])) # Regular variant sample should still be active sample_models = Sample.objects.filter( individual__guid='I000001_na19675') self.assertEqual(len(sample_models), 2) self.assertSetEqual({sv_sample_guid, existing_index_sample_guid}, {sample.guid for sample in sample_models}) self.assertSetEqual({True}, {sample.is_active for sample in sample_models}) # Adding an index for a different sample type works additively mock_random.return_value = 987654 urllib3_responses.add_json( '/{}/_mapping'.format(NEW_SAMPLE_TYPE_INDEX_NAME), { NEW_SAMPLE_TYPE_INDEX_NAME: { 'mappings': { '_meta': { 'sampleType': 'WGS', 'genomeVersion': '37', 'sourceFilePath': 'test_data.vds', }, "properties": MAPPING_PROPS_SAMPLES_NUM_ALT_1 } } }) urllib3_responses.add_json( '/{}/_search?size=0'.format(NEW_SAMPLE_TYPE_INDEX_NAME), { 'aggregations': { 'sample_ids': { 'buckets': [{ 'key': 'NA19675_1' }] } } }, method=urllib3_responses.POST) response = self.client.post(url, content_type='application/json', data=json.dumps({ 'elasticsearchIndex': NEW_SAMPLE_TYPE_INDEX_NAME, 'datasetType': 'VARIANTS', })) self.assertEqual(response.status_code, 200) response_json = response.json() self.assertSetEqual( set(response_json.keys()), {'samplesByGuid', 'individualsByGuid', 'familiesByGuid'}) new_sample_type_sample_guid = 'S987654_NA19675_1' self.assertDictEqual(response_json['familiesByGuid'], {}) self.assertListEqual(list(response_json['samplesByGuid'].keys()), [new_sample_type_sample_guid]) self.assertEqual( response_json['samplesByGuid'][new_sample_type_sample_guid] ['datasetType'], 'VARIANTS') self.assertEqual( response_json['samplesByGuid'][new_sample_type_sample_guid] ['sampleType'], 'WGS') self.assertTrue(response_json['samplesByGuid'] [new_sample_type_sample_guid]['isActive']) self.assertListEqual(list(response_json['individualsByGuid'].keys()), ['I000001_na19675']) self.assertListEqual( list(response_json['individualsByGuid']['I000001_na19675'].keys()), ['sampleGuids']) self.assertSetEqual( set(response_json['individualsByGuid']['I000001_na19675'] ['sampleGuids']), set([ sv_sample_guid, existing_index_sample_guid, new_sample_type_sample_guid ])) # Previous variant samples should still be active sample_models = Sample.objects.filter( individual__guid='I000001_na19675') self.assertEqual(len(sample_models), 3) self.assertSetEqual( { sv_sample_guid, existing_index_sample_guid, new_sample_type_sample_guid }, {sample.guid for sample in sample_models}) self.assertSetEqual({True}, {sample.is_active for sample in sample_models})