def test_start_diagnostics_job_error(mock_uuid): responses.add( responses.PUT, 'http://leader.mesos/system/health/v1/diagnostics/f053c58c-b9ce-11e9-8c5b-38d54714bf36', json={ 'code': 507, 'error': 'could not create bundle f053c58c-b9ce-11e9-8c5b-38d54714bf36 workdir', }, status=507) args = dcos_api.DcosApiSession.get_args_from_env() dcos_api_session = dcos_api.DcosApiSession(**args) health_url = dcos_api_session.default_url.copy(path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=[], all_slaves=[], session=dcos_api_session.copy().session, ) with TestCase.assertRaises(TestCase(), HTTPError): response = diagnostics.start_diagnostics_job() check_json(response)
def test_start_diagnostics_job(mock_uuid): responses.add( responses.PUT, 'http://leader.mesos/system/health/v1/diagnostics/f053c58c-b9ce-11e9-8c5b-38d54714bf36', json={ 'id': 'f053c58c-b9ce-11e9-8c5b-38d54714bf36', 'status': 'Started', 'started_at': '2019-08-05T11:31:53.238640571Z', }) args = dcos_api.DcosApiSession.get_args_from_env() dcos_api_session = dcos_api.DcosApiSession(**args) health_url = dcos_api_session.default_url.copy(path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=[], all_slaves=[], session=dcos_api_session.copy().session, ) response = diagnostics.start_diagnostics_job() assert check_json(response) == { 'id': 'f053c58c-b9ce-11e9-8c5b-38d54714bf36', 'status': 'Started', 'started_at': '2019-08-05T11:31:53.238640571Z', }
def test_download_reports(): responses.add( responses.GET, 'http://leader.mesos/system/health/v1/diagnostics/f053c58c-b9ce-11e9-8c5b-38d54714bf36/file', content_type='application/zip', body='OK') args = dcos_api.DcosApiSession.get_args_from_env() dcos_api_session = dcos_api.DcosApiSession(**args) health_url = dcos_api_session.default_url.copy(path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=['leader.mesos'], all_slaves=[], session=dcos_api_session.copy().session, ) with tempfile.TemporaryDirectory() as tmpdirname: diagnostics.download_diagnostics_reports( ['f053c58c-b9ce-11e9-8c5b-38d54714bf36'], tmpdirname) with open( os.path.join(tmpdirname, 'f053c58c-b9ce-11e9-8c5b-38d54714bf36'), 'r') as f: assert f.read() == 'OK'
def test_get_reports(): responses.add(responses.GET, 'http://leader.mesos/system/health/v1/diagnostics', json=[{ 'id': '123e4567-e89b-12d3-a456-426655440000', 'status': 'Started' }, { 'id': '123e4567-e89b-12d3-a456-426655440000', 'status': 'Deleted' }, { 'id': 'f053c58c-b9ce-11e9-8c5b-38d54714bf36', 'status': 'Done' }]) args = dcos_api.DcosApiSession.get_args_from_env() dcos_api_session = dcos_api.DcosApiSession(**args) health_url = dcos_api_session.default_url.copy(path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=[], all_slaves=[], session=dcos_api_session.copy().session, ) assert diagnostics.get_diagnostics_reports() == [ '123e4567-e89b-12d3-a456-426655440000', 'f053c58c-b9ce-11e9-8c5b-38d54714bf36' ]
def _delete_bundle(diagnostics: Diagnostics, bundle: str) -> None: bundles = diagnostics.get_diagnostics_reports() assert bundle in bundles, 'not found {} in {}'.format(bundle, bundles) diagnostics.delete_bundle(bundle) bundles = diagnostics.get_diagnostics_reports() assert bundle not in bundles, 'found {} in {}'.format(bundle, bundles)
def _create_bundle(dcos_api_session): last_datapoint = { 'time': None, 'value': 0 } health_url = dcos_api_session.default_url.copy( query='cache=0', path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=dcos_api_session.masters, all_slaves=dcos_api_session.all_slaves, session=dcos_api_session.copy().session, ) create_response = diagnostics.start_diagnostics_job().json() diagnostics.wait_for_diagnostics_job(last_datapoint=last_datapoint) diagnostics.wait_for_diagnostics_reports() bundles = diagnostics.get_diagnostics_reports() assert len(bundles) == 1, 'bundle file not found' assert bundles[0] == create_response['extra']['bundle_name'] return create_response['extra']['bundle_name']
def _dump_diagnostics(request, dcos_api_session): """Download the zipped diagnostics bundle report from each master in the cluster to the home directory. This should be run last. The _ prefix makes sure that pytest calls this first out of the autouse session scope fixtures, which means that its post-yield code will be executed last. * There is no official way to ensure fixtures are called in a certain order https://github.com/pytest-dev/pytest/issues/1216 * However it seems that fixtures at the same scope are called alphabetically https://stackoverflow.com/a/28593102/1436300 """ yield make_diagnostics_report = os.environ.get('DIAGNOSTICS_DIRECTORY') is not None if make_diagnostics_report: creation_start = datetime.datetime.now() last_datapoint = { 'time': None, 'value': 0 } health_url = dcos_api_session.default_url.copy( query='cache=0', path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=dcos_api_session.masters, all_slaves=dcos_api_session.all_slaves, session=dcos_api_session.copy().session, ) log.info('Create diagnostics report for all nodes') diagnostics.start_diagnostics_job() log.info('\nWait for diagnostics job to complete') diagnostics.wait_for_diagnostics_job(last_datapoint=last_datapoint) duration = last_datapoint['time'] - creation_start log.info('\nDiagnostis bundle took {} to generate'.format(duration)) log.info('\nWait for diagnostics report to become available') diagnostics.wait_for_diagnostics_reports() log.info('\nDownload zipped diagnostics reports') bundles = diagnostics.get_diagnostics_reports() diagnostics.download_diagnostics_reports(diagnostics_bundles=bundles) else: log.info('\nNot downloading diagnostics bundle for this session.')
def test_dcos_diagnostics_bundle_create_download_delete( dcos_api_session: DcosApiSession, use_legacy_api: bool) -> None: """ test bundle create, read, delete workflow """ health_url = dcos_api_session.default_url.copy( query='cache=0', path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=dcos_api_session.masters, all_slaves=dcos_api_session.all_slaves, session=dcos_api_session.copy().session, use_legacy_api=use_legacy_api, ) app, test_uuid = test_helpers.marathon_test_docker_app('diag-bundle', constraints=[]) with dcos_api_session.marathon.deploy_and_cleanup(app, timeout=120): bundle = _create_bundle(diagnostics) _check_diagnostics_bundle_status(dcos_api_session) _download_and_extract_bundle(dcos_api_session, bundle, diagnostics) _download_and_extract_bundle_from_another_master( dcos_api_session, bundle, diagnostics) _delete_bundle(diagnostics, bundle)
def _create_bundle(dcos_api_session): last_datapoint = { 'time': None, 'value': 0 } health_url = dcos_api_session.default_url.copy( query='cache=0', path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=dcos_api_session.masters, all_slaves=dcos_api_session.all_slaves, session=dcos_api_session.copy().session, ) create_response = diagnostics.start_diagnostics_job().json() diagnostics.wait_for_diagnostics_job(last_datapoint=last_datapoint) diagnostics.wait_for_diagnostics_reports() bundles = diagnostics.get_diagnostics_reports() assert len(bundles) == 1, 'bundle file not found' assert bundles[0] == create_response['extra']['bundle_name'] return create_response['extra']['bundle_name']
def _delete_bundle(dcos_api_session, bundle): health_url = dcos_api_session.default_url.copy( query='cache=0', path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=dcos_api_session.masters, all_slaves=dcos_api_session.all_slaves, session=dcos_api_session.copy().session, ) bundles = diagnostics.get_diagnostics_reports() assert bundle in bundles, 'not found {} in {}'.format(bundle, bundles) dcos_api_session.health.post(os.path.join('/report/diagnostics/delete', bundle)) bundles = diagnostics.get_diagnostics_reports() assert bundle not in bundles, 'found {} in {}'.format(bundle, bundles)
def _delete_bundle(dcos_api_session, bundle): health_url = dcos_api_session.default_url.copy( query='cache=0', path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=dcos_api_session.masters, all_slaves=dcos_api_session.all_slaves, session=dcos_api_session.copy().session, ) bundles = diagnostics.get_diagnostics_reports() assert bundle in bundles, 'not found {} in {}'.format(bundle, bundles) dcos_api_session.health.post(os.path.join('/report/diagnostics/delete', bundle)) bundles = diagnostics.get_diagnostics_reports() assert bundle not in bundles, 'found {} in {}'.format(bundle, bundles)
def _create_bundle(diagnostics: Diagnostics) -> str: last_datapoint = {'time': None, 'value': 0} create_response = diagnostics.start_diagnostics_job().json() diagnostics.wait_for_diagnostics_job(last_datapoint=last_datapoint) diagnostics.wait_for_diagnostics_reports() bundles = diagnostics.get_diagnostics_reports() assert len(bundles) > 0, 'bundle file not found' bundle_name = create_response.get('id') # type: str if not bundle_name: bundle_name = create_response['extra']['bundle_name'] assert bundle_name in bundles return bundle_name
def _dump_diagnostics(request, dcos_api_session): """Download the zipped diagnostics bundle report from each master in the cluster to the home directory. This should be run last. The _ prefix makes sure that pytest calls this first out of the autouse session scope fixtures, which means that its post-yield code will be executed last. * There is no official way to ensure fixtures are called in a certain order https://github.com/pytest-dev/pytest/issues/1216 * However it seems that fixtures at the same scope are called alphabetically https://stackoverflow.com/a/28593102/1436300 """ yield make_diagnostics_report = os.environ.get( 'DIAGNOSTICS_DIRECTORY') is not None if make_diagnostics_report: last_datapoint = {'time': None, 'value': 0} health_url = dcos_api_session.default_url.copy( query='cache=0', path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=dcos_api_session.masters, all_slaves=dcos_api_session.all_slaves, session=dcos_api_session.copy().session, ) log.info('Create diagnostics report for all nodes') diagnostics.start_diagnostics_job() log.info('\nWait for diagnostics job to complete') diagnostics.wait_for_diagnostics_job(last_datapoint=last_datapoint) log.info('\nWait for diagnostics report to become available') diagnostics.wait_for_diagnostics_reports() log.info('\nDownload zipped diagnostics reports') bundles = diagnostics.get_diagnostics_reports() diagnostics.download_diagnostics_reports(diagnostics_bundles=bundles) else: log.info('\nNot downloading diagnostics bundle for this session.')
def _download_bundle_from_master(dcos_api_session: DcosApiSession, master_index: Any, bundle: str, diagnostics: Diagnostics) -> None: """ Download DC/OS diagnostics bundle from a master :param dcos_api_session: dcos_api_session fixture :param master_index: master index from dcos_api_session.masters array :param bundle: bundle name to download from master :param diagnostics: DCOS Diagnostics client """ bundles = diagnostics.get_diagnostics_reports() assert bundle in bundles, 'not found {} in {}'.format(bundle, bundles) expected_common_files = [ 'df.output', 'dmesg_-T.output', 'ip_addr.output', 'ip_route.output', 'ps_aux_ww_Z.output', 'optmesospherebincurl_-s_-S_http:localhost:62080v1vips.output', 'optmesospherebincurl_-s_-S_http:localhost:62080v1records.output', 'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsdefault.output', 'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsdns.output', 'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsmesos_listener.output', 'optmesospherebincurl_-s_-S_http:localhost:62080v1metricslashup.output', 'timedatectl.output', 'binsh_-c_cat etc*-release.output', 'systemctl_list-units_dcos*.output', 'sestatus.output', 'iptables-save.output', 'ip6tables-save.output', 'ipset_list.output', 'opt/mesosphere/active.buildinfo.full.json', 'opt/mesosphere/etc/dcos-version.json', 'opt/mesosphere/etc/expanded.config.json', 'opt/mesosphere/etc/user.config.yaml', 'dcos-diagnostics-health.json', 'var/lib/dcos/cluster-id', 'proc/cmdline', 'proc/cpuinfo', 'proc/meminfo', 'proc/self/mountinfo', 'optmesospherebindetect_ip.output', 'sysctl_-a.output', ] # these files are expected to be in archive for a master host expected_master_files = [ 'binsh_-c_cat proc`systemctl show dcos-mesos-master.service -p MainPID| cut -d\'=\' -f2`environ.output', '5050-quota.json', '5050-overlay-master_state.json', 'dcos-mesos-master.service', 'var/lib/dcos/exhibitor/zookeeper/snapshot/myid', 'var/lib/dcos/exhibitor/conf/zoo.cfg', 'var/lib/dcos/mesos/log/mesos-master.log', 'var/lib/dcos/mesos/log/mesos-master.log.1', 'var/lib/dcos/mesos/log/mesos-master.log.2.gz', 'var/lib/dcos/mesos/log/mesos-master.log.3.gz', ] + expected_common_files expected_agent_common_files = [ '5051-containers.json', '5051-containerizer_debug.json', '5051-overlay-agent_overlay.json', 'var/log/mesos/mesos-agent.log', 'docker_--version.output', 'docker_ps.output', ] # for agent host expected_agent_files = [ 'dcos-mesos-slave.service', 'binsh_-c_cat proc`systemctl show dcos-mesos-slave.service -p MainPID| cut -d\'=\' -f2`environ.output' ] + expected_agent_common_files + expected_common_files # for public agent host expected_public_agent_files = [ 'dcos-mesos-slave-public.service', "binsh_-c_cat proc`systemctl show dcos-mesos-slave-public.service -p MainPID| cut -d'=' -f2`environ.output" ] + expected_agent_common_files + expected_common_files def _read_from_zip(z: zipfile.ZipFile, item: str, to_json: bool = True) -> Any: # raises KeyError if item is not in zipfile. item_content = z.read(item).decode() if to_json: # raises ValueError if cannot deserialize item_content. return json.loads(item_content) return item_content def _get_dcos_diagnostics_health(z: zipfile.ZipFile, item: str) -> Any: # try to load dcos-diagnostics health report and validate the report is for this host try: _health_report = _read_from_zip(z, item) except KeyError: # we did not find a key in archive, let's take a look at items in archive and try to read # diagnostics logs. # namelist() gets a list of all items in a zip archive. logging.info(z.namelist()) # summaryErrorsReport.txt is diagnostic job log files. log = 'summaryErrorsReport.txt' try: log_data = _read_from_zip(z, log, to_json=False) logging.info("{}:\n{}".format(log, log_data)) except KeyError: logging.info("Could not read {}".format(log)) raise except ValueError: logging.info("Could not deserialize dcos-diagnostics-health") raise return _health_report with tempfile.TemporaryDirectory() as tmp_dir: bundle_full_location = os.path.join(tmp_dir, bundle) diagnostics.download_diagnostics_reports( [bundle], tmp_dir, dcos_api_session.masters[master_index]) # validate bundle zip file. assert zipfile.is_zipfile(bundle_full_location) z = zipfile.ZipFile(bundle_full_location) # get a list of all files in a zip archive. archived_items = z.namelist() # validate error log is empty if 'summaryErrorsReport.txt' in archived_items: log_data = _read_from_zip(z, 'summaryErrorsReport.txt', to_json=False) raise AssertionError( 'summaryErrorsReport.txt must be empty. Got {}'.format( log_data)) # make sure all required log files for master node are in place. for master_ip in dcos_api_session.masters: master_folder = master_ip + '_master/' # try to load dcos-diagnostics health report and validate the report is for this host health_report = _get_dcos_diagnostics_health( z, master_folder + 'dcos-diagnostics-health.json') assert 'ip' in health_report assert health_report['ip'] == master_ip # make sure systemd unit output is correct and does not contain error message unit_output = get_file_content( master_folder + 'dcos-mesos-master.service', z) verify_unit_response(unit_output, 100) verify_archived_items(master_folder, archived_items, expected_master_files) state_output = get_file_content( master_folder + '5050-master_state.json', z) validate_state(state_output) # make sure all required log files for agent node are in place. for slave_ip in dcos_api_session.slaves: agent_folder = slave_ip + '_agent/' # try to load dcos-diagnostics health report and validate the report is for this host health_report = _get_dcos_diagnostics_health( z, agent_folder + 'dcos-diagnostics-health.json') assert 'ip' in health_report assert health_report['ip'] == slave_ip # make sure systemd unit output is correct and does not contain error message unit_output = get_file_content( agent_folder + 'dcos-mesos-slave.service', z) verify_unit_response(unit_output, 100) verify_archived_items(agent_folder, archived_items, expected_agent_files) # make sure all required log files for public agent node are in place. for public_slave_ip in dcos_api_session.public_slaves: agent_public_folder = public_slave_ip + '_agent_public/' # try to load dcos-diagnostics health report and validate the report is for this host health_report = _get_dcos_diagnostics_health( z, agent_public_folder + 'dcos-diagnostics-health.json') assert 'ip' in health_report assert health_report['ip'] == public_slave_ip # make sure systemd unit output is correct and does not contain error message unit_output = get_file_content( agent_public_folder + 'dcos-mesos-slave-public.service', z) verify_unit_response(unit_output, 100) verify_archived_items(agent_public_folder, archived_items, expected_public_agent_files)
def _download_bundle_from_master(dcos_api_session, master_index, bundle): """ Download DC/OS diagnostics bundle from a master :param dcos_api_session: dcos_api_session fixture :param master_index: master index from dcos_api_session.masters array :param bundle: bundle name to download from master """ assert len(dcos_api_session.masters ) >= master_index + 1, '{} masters required. Got {}'.format( master_index + 1, len(dcos_api_session.masters)) health_url = dcos_api_session.default_url.copy( query='cache=0', path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=dcos_api_session.masters, all_slaves=dcos_api_session.all_slaves, session=dcos_api_session.copy().session, ) bundles = diagnostics.get_diagnostics_reports() assert bundle in bundles, 'not found {} in {}'.format(bundle, bundles) expected_common_files = [ 'dmesg_-T.output.gz', 'ip_addr.output.gz', 'ip_route.output.gz', 'ps_aux_ww_Z.output.gz', 'optmesospherebincurl_-s_-S_http:localhost:62080v1vips.output.gz', 'optmesospherebincurl_-s_-S_http:localhost:62080v1records.output.gz', 'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsdefault.output.gz', 'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsdns.output.gz', 'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsmesos_listener.output.gz', 'optmesospherebincurl_-s_-S_http:localhost:62080v1metricslashup.output.gz', 'timedatectl.output.gz', 'binsh_-c_cat etc*-release.output.gz', 'systemctl_list-units_dcos*.output.gz', 'sestatus.output.gz', 'iptables-save.output.gz', 'ip6tables-save.output.gz', 'ipset_list.output.gz', 'opt/mesosphere/active.buildinfo.full.json.gz', 'opt/mesosphere/etc/dcos-version.json.gz', 'opt/mesosphere/etc/expanded.config.json.gz', 'opt/mesosphere/etc/user.config.yaml.gz', 'dcos-diagnostics-health.json', 'var/lib/dcos/cluster-id.gz', 'proc/cmdline.gz', 'proc/cpuinfo.gz', 'proc/meminfo.gz', 'proc/self/mountinfo.gz', 'optmesospherebindetect_ip.output.gz', 'sysctl_-a.output.gz', ] # these files are expected to be in archive for a master host expected_master_files = [ 'binsh_-c_cat proc`systemctl show dcos-mesos-master.service -p MainPID| cut -d\'=\' -f2`environ.output.gz', '5050-quota.json', '5050-overlay-master_state.json.gz', 'dcos-mesos-master.service.gz', 'var/lib/dcos/exhibitor/zookeeper/snapshot/myid.gz', 'var/lib/dcos/exhibitor/conf/zoo.cfg.gz', 'var/lib/dcos/mesos/log/mesos-master.log.gz', 'var/lib/dcos/mesos/log/mesos-master.log.1.gz', 'var/lib/dcos/mesos/log/mesos-master.log.2.gz.gz', 'var/lib/dcos/mesos/log/mesos-master.log.3.gz.gz', ] + expected_common_files expected_agent_common_files = [ '5051-containers.json', '5051-overlay-agent_overlay.json', 'var/log/mesos/mesos-agent.log.gz', 'docker_--version.output.gz', 'docker_ps.output.gz', ] # for agent host expected_agent_files = [ 'dcos-mesos-slave.service.gz', 'binsh_-c_cat proc`systemctl show dcos-mesos-slave.service -p MainPID| cut -d\'=\' -f2`environ.output.gz' ] + expected_agent_common_files + expected_common_files # for public agent host expected_public_agent_files = [ 'dcos-mesos-slave-public.service.gz', 'binsh_-c_cat proc`systemctl show dcos-mesos-slave-public.service -p MainPID| cut -d\'=\' -f2`environ.output.gz' ] + expected_agent_common_files + expected_common_files def _read_from_zip(z: zipfile.ZipFile, item: str, to_json=True): # raises KeyError if item is not in zipfile. item_content = z.read(item).decode() if to_json: # raises ValueError if cannot deserialize item_content. return json.loads(item_content) return item_content def _get_dcos_diagnostics_health(z: zipfile.ZipFile, item: str): # try to load dcos-diagnostics health report and validate the report is for this host try: _health_report = _read_from_zip(z, item) except KeyError: # we did not find a key in archive, let's take a look at items in archive and try to read # diagnostics logs. # namelist() gets a list of all items in a zip archive. logging.info(z.namelist()) # summaryErrorsReport.txt and summaryReport.txt are diagnostic job log files. for log in ('summaryErrorsReport.txt', 'summaryReport.txt'): try: log_data = _read_from_zip(z, log, to_json=False) logging.info("{}:\n{}".format(log, log_data)) except KeyError: logging.info("Could not read {}".format(log)) raise except ValueError: logging.info("Could not deserialize dcos-diagnostics-health") raise return _health_report with tempfile.TemporaryDirectory() as tmp_dir: bundle_full_location = os.path.join(tmp_dir, bundle) with open(bundle_full_location, 'wb') as f: r = dcos_api_session.health.get( os.path.join('/report/diagnostics/serve', bundle), stream=True, node=dcos_api_session.masters[master_index]) for chunk in r.iter_content(1024): f.write(chunk) # validate bundle zip file. assert zipfile.is_zipfile(bundle_full_location) z = zipfile.ZipFile(bundle_full_location) # get a list of all files in a zip archive. archived_items = z.namelist() # validate error log is empty if 'summaryErrorsReport.txt' in archived_items: log_data = _read_from_zip(z, 'summaryErrorsReport.txt', to_json=False) raise AssertionError( 'summaryErrorsReport.txt must be empty. Got {}'.format( log_data)) # validate all files in zip archive are not empty for item in archived_items: assert z.getinfo(item).file_size, 'item {} is empty'.format(item) # make sure all required log files for master node are in place. for master_ip in dcos_api_session.masters: master_folder = master_ip + '_master/' # try to load dcos-diagnostics health report and validate the report is for this host health_report = _get_dcos_diagnostics_health( z, master_folder + 'dcos-diagnostics-health.json') assert 'ip' in health_report assert health_report['ip'] == master_ip # make sure systemd unit output is correct and does not contain error message gzipped_unit_output = z.open(master_folder + 'dcos-mesos-master.service.gz') verify_unit_response(gzipped_unit_output, 100) verify_archived_items(master_folder, archived_items, expected_master_files) gzipped_state_output = z.open(master_folder + '5050-master_state.json.gz') validate_state(gzipped_state_output) # make sure all required log files for agent node are in place. for slave_ip in dcos_api_session.slaves: agent_folder = slave_ip + '_agent/' # try to load dcos-diagnostics health report and validate the report is for this host health_report = _get_dcos_diagnostics_health( z, agent_folder + 'dcos-diagnostics-health.json') assert 'ip' in health_report assert health_report['ip'] == slave_ip # make sure systemd unit output is correct and does not contain error message gzipped_unit_output = z.open(agent_folder + 'dcos-mesos-slave.service.gz') verify_unit_response(gzipped_unit_output, 100) verify_archived_items(agent_folder, archived_items, expected_agent_files) # make sure all required log files for public agent node are in place. for public_slave_ip in dcos_api_session.public_slaves: agent_public_folder = public_slave_ip + '_agent_public/' # try to load dcos-diagnostics health report and validate the report is for this host health_report = _get_dcos_diagnostics_health( z, agent_public_folder + 'dcos-diagnostics-health.json') assert 'ip' in health_report assert health_report['ip'] == public_slave_ip # make sure systemd unit output is correct and does not contain error message gzipped_unit_output = z.open(agent_public_folder + 'dcos-mesos-slave-public.service.gz') verify_unit_response(gzipped_unit_output, 100) verify_archived_items(agent_public_folder, archived_items, expected_public_agent_files)
def _download_bundle_from_master(dcos_api_session, master_index, bundle): """ Download DC/OS diagnostics bundle from a master :param dcos_api_session: dcos_api_session fixture :param master_index: master index from dcos_api_session.masters array :param bundle: bundle name to download from master """ assert len(dcos_api_session.masters) >= master_index + 1, '{} masters required. Got {}'.format( master_index + 1, len(dcos_api_session.masters)) health_url = dcos_api_session.default_url.copy( query='cache=0', path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=dcos_api_session.masters, all_slaves=dcos_api_session.all_slaves, session=dcos_api_session.copy().session, ) bundles = diagnostics.get_diagnostics_reports() assert bundle in bundles, 'not found {} in {}'.format(bundle, bundles) expected_common_files = ['dmesg_-T.output.gz', 'ip_addr.output.gz', 'ip_route.output.gz', 'ps_aux_ww_Z.output.gz', 'optmesospherebincurl_-s_-S_http:localhost:62080v1vips.output.gz', 'optmesospherebincurl_-s_-S_http:localhost:62080v1records.output.gz', 'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsdefault.output.gz', 'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsdns.output.gz', 'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsmesos_listener.output.gz', 'optmesospherebincurl_-s_-S_http:localhost:62080v1metricslashup.output.gz', 'timedatectl.output.gz', 'binsh_-c_cat etc*-release.output.gz', 'systemctl_list-units_dcos*.output.gz', 'sestatus.output.gz', 'iptables-save.output.gz', 'ip6tables-save.output.gz', 'ipset_list.output.gz', 'opt/mesosphere/active.buildinfo.full.json.gz', 'opt/mesosphere/etc/dcos-version.json.gz', 'opt/mesosphere/etc/expanded.config.json.gz', 'opt/mesosphere/etc/user.config.yaml.gz', 'dcos-diagnostics-health.json', 'var/lib/dcos/cluster-id.gz', 'proc/cmdline.gz', 'proc/cpuinfo.gz', 'proc/meminfo.gz', 'proc/self/mountinfo.gz', 'optmesospherebindetect_ip.output.gz', 'sysctl_-a.output.gz', ] # these files are expected to be in archive for a master host expected_master_files = [ 'binsh_-c_cat proc`systemctl show dcos-mesos-master.service -p MainPID| cut -d\'=\' -f2`environ.output.gz', '5050-quota.json', '5050-overlay-master_state.json.gz', 'dcos-mesos-master.service.gz', 'var/lib/dcos/exhibitor/zookeeper/snapshot/myid.gz', 'var/lib/dcos/exhibitor/conf/zoo.cfg.gz', 'var/lib/dcos/mesos/log/mesos-master.log.gz', 'var/lib/dcos/mesos/log/mesos-master.log.1.gz', 'var/lib/dcos/mesos/log/mesos-master.log.2.gz.gz', 'var/lib/dcos/mesos/log/mesos-master.log.3.gz.gz', ] + expected_common_files expected_agent_common_files = [ '5051-containers.json', '5051-overlay-agent_overlay.json', 'var/log/mesos/mesos-agent.log.gz', 'docker_--version.output.gz', 'docker_ps.output.gz', ] # for agent host expected_agent_files = [ 'dcos-mesos-slave.service.gz', 'binsh_-c_cat proc`systemctl show dcos-mesos-slave.service -p MainPID| cut -d\'=\' -f2`environ.output.gz' ] + expected_agent_common_files + expected_common_files # for public agent host expected_public_agent_files = [ 'dcos-mesos-slave-public.service.gz', 'binsh_-c_cat proc`systemctl show dcos-mesos-slave-public.service -p MainPID| cut -d\'=\' -f2`environ.output.gz' ] + expected_agent_common_files + expected_common_files def _read_from_zip(z: zipfile.ZipFile, item: str, to_json=True): # raises KeyError if item is not in zipfile. item_content = z.read(item).decode() if to_json: # raises ValueError if cannot deserialize item_content. return json.loads(item_content) return item_content def _get_dcos_diagnostics_health(z: zipfile.ZipFile, item: str): # try to load dcos-diagnostics health report and validate the report is for this host try: _health_report = _read_from_zip(z, item) except KeyError: # we did not find a key in archive, let's take a look at items in archive and try to read # diagnostics logs. # namelist() gets a list of all items in a zip archive. logging.info(z.namelist()) # summaryErrorsReport.txt and summaryReport.txt are diagnostic job log files. for log in ('summaryErrorsReport.txt', 'summaryReport.txt'): try: log_data = _read_from_zip(z, log, to_json=False) logging.info("{}:\n{}".format(log, log_data)) except KeyError: logging.info("Could not read {}".format(log)) raise except ValueError: logging.info("Could not deserialize dcos-diagnostics-health") raise return _health_report with tempfile.TemporaryDirectory() as tmp_dir: bundle_full_location = os.path.join(tmp_dir, bundle) with open(bundle_full_location, 'wb') as f: r = dcos_api_session.health.get(os.path.join('/report/diagnostics/serve', bundle), stream=True, node=dcos_api_session.masters[master_index]) for chunk in r.iter_content(1024): f.write(chunk) # validate bundle zip file. assert zipfile.is_zipfile(bundle_full_location) z = zipfile.ZipFile(bundle_full_location) # get a list of all files in a zip archive. archived_items = z.namelist() # validate error log is empty if 'summaryErrorsReport.txt' in archived_items: log_data = _read_from_zip(z, 'summaryErrorsReport.txt', to_json=False) raise AssertionError('summaryErrorsReport.txt must be empty. Got {}'.format(log_data)) # validate all files in zip archive are not empty for item in archived_items: assert z.getinfo(item).file_size, 'item {} is empty'.format(item) # make sure all required log files for master node are in place. for master_ip in dcos_api_session.masters: master_folder = master_ip + '_master/' # try to load dcos-diagnostics health report and validate the report is for this host health_report = _get_dcos_diagnostics_health(z, master_folder + 'dcos-diagnostics-health.json') assert 'ip' in health_report assert health_report['ip'] == master_ip # make sure systemd unit output is correct and does not contain error message gzipped_unit_output = z.open(master_folder + 'dcos-mesos-master.service.gz') verify_unit_response(gzipped_unit_output, 100) verify_archived_items(master_folder, archived_items, expected_master_files) gzipped_state_output = z.open(master_folder + '5050-master_state.json.gz') validate_state(gzipped_state_output) # make sure all required log files for agent node are in place. for slave_ip in dcos_api_session.slaves: agent_folder = slave_ip + '_agent/' # try to load dcos-diagnostics health report and validate the report is for this host health_report = _get_dcos_diagnostics_health(z, agent_folder + 'dcos-diagnostics-health.json') assert 'ip' in health_report assert health_report['ip'] == slave_ip # make sure systemd unit output is correct and does not contain error message gzipped_unit_output = z.open(agent_folder + 'dcos-mesos-slave.service.gz') verify_unit_response(gzipped_unit_output, 100) verify_archived_items(agent_folder, archived_items, expected_agent_files) # make sure all required log files for public agent node are in place. for public_slave_ip in dcos_api_session.public_slaves: agent_public_folder = public_slave_ip + '_agent_public/' # try to load dcos-diagnostics health report and validate the report is for this host health_report = _get_dcos_diagnostics_health(z, agent_public_folder + 'dcos-diagnostics-health.json') assert 'ip' in health_report assert health_report['ip'] == public_slave_ip # make sure systemd unit output is correct and does not contain error message gzipped_unit_output = z.open(agent_public_folder + 'dcos-mesos-slave-public.service.gz') verify_unit_response(gzipped_unit_output, 100) verify_archived_items(agent_public_folder, archived_items, expected_public_agent_files)