def test_start_diagnostics_job_error(mock_uuid):
    responses.add(
        responses.PUT,
        'http://leader.mesos/system/health/v1/diagnostics/f053c58c-b9ce-11e9-8c5b-38d54714bf36',
        json={
            'code':
            507,
            'error':
            'could not create bundle f053c58c-b9ce-11e9-8c5b-38d54714bf36 workdir',
        },
        status=507)

    args = dcos_api.DcosApiSession.get_args_from_env()
    dcos_api_session = dcos_api.DcosApiSession(**args)

    health_url = dcos_api_session.default_url.copy(path='system/health/v1', )

    diagnostics = Diagnostics(
        default_url=health_url,
        masters=[],
        all_slaves=[],
        session=dcos_api_session.copy().session,
    )

    with TestCase.assertRaises(TestCase(), HTTPError):
        response = diagnostics.start_diagnostics_job()
        check_json(response)
def test_start_diagnostics_job(mock_uuid):
    responses.add(
        responses.PUT,
        'http://leader.mesos/system/health/v1/diagnostics/f053c58c-b9ce-11e9-8c5b-38d54714bf36',
        json={
            'id': 'f053c58c-b9ce-11e9-8c5b-38d54714bf36',
            'status': 'Started',
            'started_at': '2019-08-05T11:31:53.238640571Z',
        })

    args = dcos_api.DcosApiSession.get_args_from_env()
    dcos_api_session = dcos_api.DcosApiSession(**args)

    health_url = dcos_api_session.default_url.copy(path='system/health/v1', )

    diagnostics = Diagnostics(
        default_url=health_url,
        masters=[],
        all_slaves=[],
        session=dcos_api_session.copy().session,
    )

    response = diagnostics.start_diagnostics_job()

    assert check_json(response) == {
        'id': 'f053c58c-b9ce-11e9-8c5b-38d54714bf36',
        'status': 'Started',
        'started_at': '2019-08-05T11:31:53.238640571Z',
    }
def test_download_reports():
    responses.add(
        responses.GET,
        'http://leader.mesos/system/health/v1/diagnostics/f053c58c-b9ce-11e9-8c5b-38d54714bf36/file',
        content_type='application/zip',
        body='OK')

    args = dcos_api.DcosApiSession.get_args_from_env()
    dcos_api_session = dcos_api.DcosApiSession(**args)

    health_url = dcos_api_session.default_url.copy(path='system/health/v1', )

    diagnostics = Diagnostics(
        default_url=health_url,
        masters=['leader.mesos'],
        all_slaves=[],
        session=dcos_api_session.copy().session,
    )

    with tempfile.TemporaryDirectory() as tmpdirname:
        diagnostics.download_diagnostics_reports(
            ['f053c58c-b9ce-11e9-8c5b-38d54714bf36'], tmpdirname)
        with open(
                os.path.join(tmpdirname,
                             'f053c58c-b9ce-11e9-8c5b-38d54714bf36'),
                'r') as f:
            assert f.read() == 'OK'
def test_get_reports():
    responses.add(responses.GET,
                  'http://leader.mesos/system/health/v1/diagnostics',
                  json=[{
                      'id': '123e4567-e89b-12d3-a456-426655440000',
                      'status': 'Started'
                  }, {
                      'id': '123e4567-e89b-12d3-a456-426655440000',
                      'status': 'Deleted'
                  }, {
                      'id': 'f053c58c-b9ce-11e9-8c5b-38d54714bf36',
                      'status': 'Done'
                  }])

    args = dcos_api.DcosApiSession.get_args_from_env()
    dcos_api_session = dcos_api.DcosApiSession(**args)

    health_url = dcos_api_session.default_url.copy(path='system/health/v1', )

    diagnostics = Diagnostics(
        default_url=health_url,
        masters=[],
        all_slaves=[],
        session=dcos_api_session.copy().session,
    )

    assert diagnostics.get_diagnostics_reports() == [
        '123e4567-e89b-12d3-a456-426655440000',
        'f053c58c-b9ce-11e9-8c5b-38d54714bf36'
    ]
Beispiel #5
0
def _delete_bundle(diagnostics: Diagnostics, bundle: str) -> None:
    bundles = diagnostics.get_diagnostics_reports()
    assert bundle in bundles, 'not found {} in {}'.format(bundle, bundles)

    diagnostics.delete_bundle(bundle)

    bundles = diagnostics.get_diagnostics_reports()
    assert bundle not in bundles, 'found {} in {}'.format(bundle, bundles)
def _create_bundle(dcos_api_session):
    last_datapoint = {
        'time': None,
        'value': 0
    }

    health_url = dcos_api_session.default_url.copy(
        query='cache=0',
        path='system/health/v1',
    )

    diagnostics = Diagnostics(
        default_url=health_url,
        masters=dcos_api_session.masters,
        all_slaves=dcos_api_session.all_slaves,
        session=dcos_api_session.copy().session,
    )

    create_response = diagnostics.start_diagnostics_job().json()
    diagnostics.wait_for_diagnostics_job(last_datapoint=last_datapoint)
    diagnostics.wait_for_diagnostics_reports()
    bundles = diagnostics.get_diagnostics_reports()
    assert len(bundles) == 1, 'bundle file not found'
    assert bundles[0] == create_response['extra']['bundle_name']

    return create_response['extra']['bundle_name']
Beispiel #7
0
def _dump_diagnostics(request, dcos_api_session):
    """Download the zipped diagnostics bundle report from each master in the cluster to the home directory. This should
    be run last. The _ prefix makes sure that pytest calls this first out of the autouse session scope fixtures, which
    means that its post-yield code will be executed last.

    * There is no official way to ensure fixtures are called in a certain order
    https://github.com/pytest-dev/pytest/issues/1216
    * However it seems that fixtures at the same scope are called alphabetically
    https://stackoverflow.com/a/28593102/1436300
    """
    yield

    make_diagnostics_report = os.environ.get('DIAGNOSTICS_DIRECTORY') is not None
    if make_diagnostics_report:
        creation_start = datetime.datetime.now()
        last_datapoint = {
            'time': None,
            'value': 0
        }

        health_url = dcos_api_session.default_url.copy(
            query='cache=0',
            path='system/health/v1',
        )

        diagnostics = Diagnostics(
            default_url=health_url,
            masters=dcos_api_session.masters,
            all_slaves=dcos_api_session.all_slaves,
            session=dcos_api_session.copy().session,
        )

        log.info('Create diagnostics report for all nodes')
        diagnostics.start_diagnostics_job()

        log.info('\nWait for diagnostics job to complete')
        diagnostics.wait_for_diagnostics_job(last_datapoint=last_datapoint)

        duration = last_datapoint['time'] - creation_start
        log.info('\nDiagnostis bundle took {} to generate'.format(duration))

        log.info('\nWait for diagnostics report to become available')
        diagnostics.wait_for_diagnostics_reports()

        log.info('\nDownload zipped diagnostics reports')
        bundles = diagnostics.get_diagnostics_reports()
        diagnostics.download_diagnostics_reports(diagnostics_bundles=bundles)
    else:
        log.info('\nNot downloading diagnostics bundle for this session.')
Beispiel #8
0
def test_dcos_diagnostics_bundle_create_download_delete(
        dcos_api_session: DcosApiSession, use_legacy_api: bool) -> None:
    """
    test bundle create, read, delete workflow
    """

    health_url = dcos_api_session.default_url.copy(
        query='cache=0',
        path='system/health/v1',
    )

    diagnostics = Diagnostics(
        default_url=health_url,
        masters=dcos_api_session.masters,
        all_slaves=dcos_api_session.all_slaves,
        session=dcos_api_session.copy().session,
        use_legacy_api=use_legacy_api,
    )

    app, test_uuid = test_helpers.marathon_test_docker_app('diag-bundle',
                                                           constraints=[])
    with dcos_api_session.marathon.deploy_and_cleanup(app, timeout=120):
        bundle = _create_bundle(diagnostics)
        _check_diagnostics_bundle_status(dcos_api_session)
        _download_and_extract_bundle(dcos_api_session, bundle, diagnostics)
        _download_and_extract_bundle_from_another_master(
            dcos_api_session, bundle, diagnostics)
        _delete_bundle(diagnostics, bundle)
Beispiel #9
0
def _create_bundle(dcos_api_session):
    last_datapoint = {
        'time': None,
        'value': 0
    }

    health_url = dcos_api_session.default_url.copy(
        query='cache=0',
        path='system/health/v1',
    )

    diagnostics = Diagnostics(
        default_url=health_url,
        masters=dcos_api_session.masters,
        all_slaves=dcos_api_session.all_slaves,
        session=dcos_api_session.copy().session,
    )

    create_response = diagnostics.start_diagnostics_job().json()
    diagnostics.wait_for_diagnostics_job(last_datapoint=last_datapoint)
    diagnostics.wait_for_diagnostics_reports()
    bundles = diagnostics.get_diagnostics_reports()
    assert len(bundles) == 1, 'bundle file not found'
    assert bundles[0] == create_response['extra']['bundle_name']

    return create_response['extra']['bundle_name']
def _delete_bundle(dcos_api_session, bundle):
    health_url = dcos_api_session.default_url.copy(
        query='cache=0',
        path='system/health/v1',
    )
    diagnostics = Diagnostics(
        default_url=health_url,
        masters=dcos_api_session.masters,
        all_slaves=dcos_api_session.all_slaves,
        session=dcos_api_session.copy().session,
    )

    bundles = diagnostics.get_diagnostics_reports()
    assert bundle in bundles, 'not found {} in {}'.format(bundle, bundles)

    dcos_api_session.health.post(os.path.join('/report/diagnostics/delete', bundle))

    bundles = diagnostics.get_diagnostics_reports()
    assert bundle not in bundles, 'found {} in {}'.format(bundle, bundles)
Beispiel #11
0
def _delete_bundle(dcos_api_session, bundle):
    health_url = dcos_api_session.default_url.copy(
        query='cache=0',
        path='system/health/v1',
    )
    diagnostics = Diagnostics(
        default_url=health_url,
        masters=dcos_api_session.masters,
        all_slaves=dcos_api_session.all_slaves,
        session=dcos_api_session.copy().session,
    )

    bundles = diagnostics.get_diagnostics_reports()
    assert bundle in bundles, 'not found {} in {}'.format(bundle, bundles)

    dcos_api_session.health.post(os.path.join('/report/diagnostics/delete', bundle))

    bundles = diagnostics.get_diagnostics_reports()
    assert bundle not in bundles, 'found {} in {}'.format(bundle, bundles)
Beispiel #12
0
def _create_bundle(diagnostics: Diagnostics) -> str:
    last_datapoint = {'time': None, 'value': 0}

    create_response = diagnostics.start_diagnostics_job().json()
    diagnostics.wait_for_diagnostics_job(last_datapoint=last_datapoint)
    diagnostics.wait_for_diagnostics_reports()
    bundles = diagnostics.get_diagnostics_reports()
    assert len(bundles) > 0, 'bundle file not found'

    bundle_name = create_response.get('id')  # type: str
    if not bundle_name:
        bundle_name = create_response['extra']['bundle_name']
    assert bundle_name in bundles

    return bundle_name
Beispiel #13
0
def _dump_diagnostics(request, dcos_api_session):
    """Download the zipped diagnostics bundle report from each master in the cluster to the home directory. This should
    be run last. The _ prefix makes sure that pytest calls this first out of the autouse session scope fixtures, which
    means that its post-yield code will be executed last.

    * There is no official way to ensure fixtures are called in a certain order
    https://github.com/pytest-dev/pytest/issues/1216
    * However it seems that fixtures at the same scope are called alphabetically
    https://stackoverflow.com/a/28593102/1436300
    """
    yield

    make_diagnostics_report = os.environ.get(
        'DIAGNOSTICS_DIRECTORY') is not None
    if make_diagnostics_report:
        last_datapoint = {'time': None, 'value': 0}

        health_url = dcos_api_session.default_url.copy(
            query='cache=0',
            path='system/health/v1',
        )

        diagnostics = Diagnostics(
            default_url=health_url,
            masters=dcos_api_session.masters,
            all_slaves=dcos_api_session.all_slaves,
            session=dcos_api_session.copy().session,
        )

        log.info('Create diagnostics report for all nodes')
        diagnostics.start_diagnostics_job()

        log.info('\nWait for diagnostics job to complete')
        diagnostics.wait_for_diagnostics_job(last_datapoint=last_datapoint)

        log.info('\nWait for diagnostics report to become available')
        diagnostics.wait_for_diagnostics_reports()

        log.info('\nDownload zipped diagnostics reports')
        bundles = diagnostics.get_diagnostics_reports()
        diagnostics.download_diagnostics_reports(diagnostics_bundles=bundles)
    else:
        log.info('\nNot downloading diagnostics bundle for this session.')
Beispiel #14
0
def _download_bundle_from_master(dcos_api_session: DcosApiSession,
                                 master_index: Any, bundle: str,
                                 diagnostics: Diagnostics) -> None:
    """ Download DC/OS diagnostics bundle from a master

    :param dcos_api_session: dcos_api_session fixture
    :param master_index: master index from dcos_api_session.masters array
    :param bundle: bundle name to download from master
    :param diagnostics: DCOS Diagnostics client
    """
    bundles = diagnostics.get_diagnostics_reports()
    assert bundle in bundles, 'not found {} in {}'.format(bundle, bundles)

    expected_common_files = [
        'df.output',
        'dmesg_-T.output',
        'ip_addr.output',
        'ip_route.output',
        'ps_aux_ww_Z.output',
        'optmesospherebincurl_-s_-S_http:localhost:62080v1vips.output',
        'optmesospherebincurl_-s_-S_http:localhost:62080v1records.output',
        'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsdefault.output',
        'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsdns.output',
        'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsmesos_listener.output',
        'optmesospherebincurl_-s_-S_http:localhost:62080v1metricslashup.output',
        'timedatectl.output',
        'binsh_-c_cat etc*-release.output',
        'systemctl_list-units_dcos*.output',
        'sestatus.output',
        'iptables-save.output',
        'ip6tables-save.output',
        'ipset_list.output',
        'opt/mesosphere/active.buildinfo.full.json',
        'opt/mesosphere/etc/dcos-version.json',
        'opt/mesosphere/etc/expanded.config.json',
        'opt/mesosphere/etc/user.config.yaml',
        'dcos-diagnostics-health.json',
        'var/lib/dcos/cluster-id',
        'proc/cmdline',
        'proc/cpuinfo',
        'proc/meminfo',
        'proc/self/mountinfo',
        'optmesospherebindetect_ip.output',
        'sysctl_-a.output',
    ]

    # these files are expected to be in archive for a master host
    expected_master_files = [
        'binsh_-c_cat proc`systemctl show dcos-mesos-master.service -p MainPID| cut -d\'=\' -f2`environ.output',
        '5050-quota.json',
        '5050-overlay-master_state.json',
        'dcos-mesos-master.service',
        'var/lib/dcos/exhibitor/zookeeper/snapshot/myid',
        'var/lib/dcos/exhibitor/conf/zoo.cfg',
        'var/lib/dcos/mesos/log/mesos-master.log',
        'var/lib/dcos/mesos/log/mesos-master.log.1',
        'var/lib/dcos/mesos/log/mesos-master.log.2.gz',
        'var/lib/dcos/mesos/log/mesos-master.log.3.gz',
    ] + expected_common_files

    expected_agent_common_files = [
        '5051-containers.json',
        '5051-containerizer_debug.json',
        '5051-overlay-agent_overlay.json',
        'var/log/mesos/mesos-agent.log',
        'docker_--version.output',
        'docker_ps.output',
    ]

    # for agent host
    expected_agent_files = [
        'dcos-mesos-slave.service',
        'binsh_-c_cat proc`systemctl show dcos-mesos-slave.service -p MainPID| cut -d\'=\' -f2`environ.output'
    ] + expected_agent_common_files + expected_common_files

    # for public agent host
    expected_public_agent_files = [
        'dcos-mesos-slave-public.service',
        "binsh_-c_cat proc`systemctl show dcos-mesos-slave-public.service -p MainPID| cut -d'=' -f2`environ.output"
    ] + expected_agent_common_files + expected_common_files

    def _read_from_zip(z: zipfile.ZipFile,
                       item: str,
                       to_json: bool = True) -> Any:
        # raises KeyError if item is not in zipfile.
        item_content = z.read(item).decode()

        if to_json:
            # raises ValueError if cannot deserialize item_content.
            return json.loads(item_content)

        return item_content

    def _get_dcos_diagnostics_health(z: zipfile.ZipFile, item: str) -> Any:
        # try to load dcos-diagnostics health report and validate the report is for this host
        try:
            _health_report = _read_from_zip(z, item)
        except KeyError:
            # we did not find a key in archive, let's take a look at items in archive and try to read
            # diagnostics logs.

            # namelist() gets a list of all items in a zip archive.
            logging.info(z.namelist())

            # summaryErrorsReport.txt is diagnostic job log files.
            log = 'summaryErrorsReport.txt'
            try:
                log_data = _read_from_zip(z, log, to_json=False)
                logging.info("{}:\n{}".format(log, log_data))
            except KeyError:
                logging.info("Could not read {}".format(log))
            raise

        except ValueError:
            logging.info("Could not deserialize dcos-diagnostics-health")
            raise

        return _health_report

    with tempfile.TemporaryDirectory() as tmp_dir:
        bundle_full_location = os.path.join(tmp_dir, bundle)
        diagnostics.download_diagnostics_reports(
            [bundle], tmp_dir, dcos_api_session.masters[master_index])

        # validate bundle zip file.
        assert zipfile.is_zipfile(bundle_full_location)
        z = zipfile.ZipFile(bundle_full_location)

        # get a list of all files in a zip archive.
        archived_items = z.namelist()

        # validate error log is empty
        if 'summaryErrorsReport.txt' in archived_items:
            log_data = _read_from_zip(z,
                                      'summaryErrorsReport.txt',
                                      to_json=False)
            raise AssertionError(
                'summaryErrorsReport.txt must be empty. Got {}'.format(
                    log_data))

        # make sure all required log files for master node are in place.
        for master_ip in dcos_api_session.masters:
            master_folder = master_ip + '_master/'

            # try to load dcos-diagnostics health report and validate the report is for this host
            health_report = _get_dcos_diagnostics_health(
                z, master_folder + 'dcos-diagnostics-health.json')
            assert 'ip' in health_report
            assert health_report['ip'] == master_ip

            # make sure systemd unit output is correct and does not contain error message
            unit_output = get_file_content(
                master_folder + 'dcos-mesos-master.service', z)
            verify_unit_response(unit_output, 100)

            verify_archived_items(master_folder, archived_items,
                                  expected_master_files)

            state_output = get_file_content(
                master_folder + '5050-master_state.json', z)
            validate_state(state_output)

        # make sure all required log files for agent node are in place.
        for slave_ip in dcos_api_session.slaves:
            agent_folder = slave_ip + '_agent/'

            # try to load dcos-diagnostics health report and validate the report is for this host
            health_report = _get_dcos_diagnostics_health(
                z, agent_folder + 'dcos-diagnostics-health.json')
            assert 'ip' in health_report
            assert health_report['ip'] == slave_ip

            # make sure systemd unit output is correct and does not contain error message
            unit_output = get_file_content(
                agent_folder + 'dcos-mesos-slave.service', z)
            verify_unit_response(unit_output, 100)

            verify_archived_items(agent_folder, archived_items,
                                  expected_agent_files)

        # make sure all required log files for public agent node are in place.
        for public_slave_ip in dcos_api_session.public_slaves:
            agent_public_folder = public_slave_ip + '_agent_public/'

            # try to load dcos-diagnostics health report and validate the report is for this host
            health_report = _get_dcos_diagnostics_health(
                z, agent_public_folder + 'dcos-diagnostics-health.json')
            assert 'ip' in health_report
            assert health_report['ip'] == public_slave_ip

            # make sure systemd unit output is correct and does not contain error message
            unit_output = get_file_content(
                agent_public_folder + 'dcos-mesos-slave-public.service', z)
            verify_unit_response(unit_output, 100)

            verify_archived_items(agent_public_folder, archived_items,
                                  expected_public_agent_files)
Beispiel #15
0
def _download_bundle_from_master(dcos_api_session, master_index, bundle):
    """ Download DC/OS diagnostics bundle from a master

    :param dcos_api_session: dcos_api_session fixture
    :param master_index: master index from dcos_api_session.masters array
    :param bundle: bundle name to download from master
    """
    assert len(dcos_api_session.masters
               ) >= master_index + 1, '{} masters required. Got {}'.format(
                   master_index + 1, len(dcos_api_session.masters))

    health_url = dcos_api_session.default_url.copy(
        query='cache=0',
        path='system/health/v1',
    )

    diagnostics = Diagnostics(
        default_url=health_url,
        masters=dcos_api_session.masters,
        all_slaves=dcos_api_session.all_slaves,
        session=dcos_api_session.copy().session,
    )

    bundles = diagnostics.get_diagnostics_reports()
    assert bundle in bundles, 'not found {} in {}'.format(bundle, bundles)

    expected_common_files = [
        'dmesg_-T.output.gz',
        'ip_addr.output.gz',
        'ip_route.output.gz',
        'ps_aux_ww_Z.output.gz',
        'optmesospherebincurl_-s_-S_http:localhost:62080v1vips.output.gz',
        'optmesospherebincurl_-s_-S_http:localhost:62080v1records.output.gz',
        'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsdefault.output.gz',
        'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsdns.output.gz',
        'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsmesos_listener.output.gz',
        'optmesospherebincurl_-s_-S_http:localhost:62080v1metricslashup.output.gz',
        'timedatectl.output.gz',
        'binsh_-c_cat etc*-release.output.gz',
        'systemctl_list-units_dcos*.output.gz',
        'sestatus.output.gz',
        'iptables-save.output.gz',
        'ip6tables-save.output.gz',
        'ipset_list.output.gz',
        'opt/mesosphere/active.buildinfo.full.json.gz',
        'opt/mesosphere/etc/dcos-version.json.gz',
        'opt/mesosphere/etc/expanded.config.json.gz',
        'opt/mesosphere/etc/user.config.yaml.gz',
        'dcos-diagnostics-health.json',
        'var/lib/dcos/cluster-id.gz',
        'proc/cmdline.gz',
        'proc/cpuinfo.gz',
        'proc/meminfo.gz',
        'proc/self/mountinfo.gz',
        'optmesospherebindetect_ip.output.gz',
        'sysctl_-a.output.gz',
    ]

    # these files are expected to be in archive for a master host
    expected_master_files = [
        'binsh_-c_cat proc`systemctl show dcos-mesos-master.service -p MainPID| cut -d\'=\' -f2`environ.output.gz',
        '5050-quota.json',
        '5050-overlay-master_state.json.gz',
        'dcos-mesos-master.service.gz',
        'var/lib/dcos/exhibitor/zookeeper/snapshot/myid.gz',
        'var/lib/dcos/exhibitor/conf/zoo.cfg.gz',
        'var/lib/dcos/mesos/log/mesos-master.log.gz',
        'var/lib/dcos/mesos/log/mesos-master.log.1.gz',
        'var/lib/dcos/mesos/log/mesos-master.log.2.gz.gz',
        'var/lib/dcos/mesos/log/mesos-master.log.3.gz.gz',
    ] + expected_common_files

    expected_agent_common_files = [
        '5051-containers.json',
        '5051-overlay-agent_overlay.json',
        'var/log/mesos/mesos-agent.log.gz',
        'docker_--version.output.gz',
        'docker_ps.output.gz',
    ]

    # for agent host
    expected_agent_files = [
        'dcos-mesos-slave.service.gz',
        'binsh_-c_cat proc`systemctl show dcos-mesos-slave.service -p MainPID| cut -d\'=\' -f2`environ.output.gz'
    ] + expected_agent_common_files + expected_common_files

    # for public agent host
    expected_public_agent_files = [
        'dcos-mesos-slave-public.service.gz',
        'binsh_-c_cat proc`systemctl show dcos-mesos-slave-public.service -p MainPID| cut -d\'=\' -f2`environ.output.gz'
    ] + expected_agent_common_files + expected_common_files

    def _read_from_zip(z: zipfile.ZipFile, item: str, to_json=True):
        # raises KeyError if item is not in zipfile.
        item_content = z.read(item).decode()

        if to_json:
            # raises ValueError if cannot deserialize item_content.
            return json.loads(item_content)

        return item_content

    def _get_dcos_diagnostics_health(z: zipfile.ZipFile, item: str):
        # try to load dcos-diagnostics health report and validate the report is for this host
        try:
            _health_report = _read_from_zip(z, item)
        except KeyError:
            # we did not find a key in archive, let's take a look at items in archive and try to read
            # diagnostics logs.

            # namelist() gets a list of all items in a zip archive.
            logging.info(z.namelist())

            # summaryErrorsReport.txt and summaryReport.txt are diagnostic job log files.
            for log in ('summaryErrorsReport.txt', 'summaryReport.txt'):
                try:
                    log_data = _read_from_zip(z, log, to_json=False)
                    logging.info("{}:\n{}".format(log, log_data))
                except KeyError:
                    logging.info("Could not read {}".format(log))
            raise

        except ValueError:
            logging.info("Could not deserialize dcos-diagnostics-health")
            raise

        return _health_report

    with tempfile.TemporaryDirectory() as tmp_dir:
        bundle_full_location = os.path.join(tmp_dir, bundle)
        with open(bundle_full_location, 'wb') as f:
            r = dcos_api_session.health.get(
                os.path.join('/report/diagnostics/serve', bundle),
                stream=True,
                node=dcos_api_session.masters[master_index])

            for chunk in r.iter_content(1024):
                f.write(chunk)

        # validate bundle zip file.
        assert zipfile.is_zipfile(bundle_full_location)
        z = zipfile.ZipFile(bundle_full_location)

        # get a list of all files in a zip archive.
        archived_items = z.namelist()

        # validate error log is empty
        if 'summaryErrorsReport.txt' in archived_items:
            log_data = _read_from_zip(z,
                                      'summaryErrorsReport.txt',
                                      to_json=False)
            raise AssertionError(
                'summaryErrorsReport.txt must be empty. Got {}'.format(
                    log_data))

        # validate all files in zip archive are not empty
        for item in archived_items:
            assert z.getinfo(item).file_size, 'item {} is empty'.format(item)

        # make sure all required log files for master node are in place.
        for master_ip in dcos_api_session.masters:
            master_folder = master_ip + '_master/'

            # try to load dcos-diagnostics health report and validate the report is for this host
            health_report = _get_dcos_diagnostics_health(
                z, master_folder + 'dcos-diagnostics-health.json')
            assert 'ip' in health_report
            assert health_report['ip'] == master_ip

            # make sure systemd unit output is correct and does not contain error message
            gzipped_unit_output = z.open(master_folder +
                                         'dcos-mesos-master.service.gz')
            verify_unit_response(gzipped_unit_output, 100)

            verify_archived_items(master_folder, archived_items,
                                  expected_master_files)

            gzipped_state_output = z.open(master_folder +
                                          '5050-master_state.json.gz')
            validate_state(gzipped_state_output)

        # make sure all required log files for agent node are in place.
        for slave_ip in dcos_api_session.slaves:
            agent_folder = slave_ip + '_agent/'

            # try to load dcos-diagnostics health report and validate the report is for this host
            health_report = _get_dcos_diagnostics_health(
                z, agent_folder + 'dcos-diagnostics-health.json')
            assert 'ip' in health_report
            assert health_report['ip'] == slave_ip

            # make sure systemd unit output is correct and does not contain error message
            gzipped_unit_output = z.open(agent_folder +
                                         'dcos-mesos-slave.service.gz')
            verify_unit_response(gzipped_unit_output, 100)

            verify_archived_items(agent_folder, archived_items,
                                  expected_agent_files)

        # make sure all required log files for public agent node are in place.
        for public_slave_ip in dcos_api_session.public_slaves:
            agent_public_folder = public_slave_ip + '_agent_public/'

            # try to load dcos-diagnostics health report and validate the report is for this host
            health_report = _get_dcos_diagnostics_health(
                z, agent_public_folder + 'dcos-diagnostics-health.json')
            assert 'ip' in health_report
            assert health_report['ip'] == public_slave_ip

            # make sure systemd unit output is correct and does not contain error message
            gzipped_unit_output = z.open(agent_public_folder +
                                         'dcos-mesos-slave-public.service.gz')
            verify_unit_response(gzipped_unit_output, 100)

            verify_archived_items(agent_public_folder, archived_items,
                                  expected_public_agent_files)
Beispiel #16
0
def _download_bundle_from_master(dcos_api_session, master_index, bundle):
    """ Download DC/OS diagnostics bundle from a master

    :param dcos_api_session: dcos_api_session fixture
    :param master_index: master index from dcos_api_session.masters array
    :param bundle: bundle name to download from master
    """
    assert len(dcos_api_session.masters) >= master_index + 1, '{} masters required. Got {}'.format(
        master_index + 1, len(dcos_api_session.masters))

    health_url = dcos_api_session.default_url.copy(
        query='cache=0',
        path='system/health/v1',
    )

    diagnostics = Diagnostics(
        default_url=health_url,
        masters=dcos_api_session.masters,
        all_slaves=dcos_api_session.all_slaves,
        session=dcos_api_session.copy().session,
    )

    bundles = diagnostics.get_diagnostics_reports()
    assert bundle in bundles, 'not found {} in {}'.format(bundle, bundles)

    expected_common_files = ['dmesg_-T.output.gz',
                             'ip_addr.output.gz',
                             'ip_route.output.gz',
                             'ps_aux_ww_Z.output.gz',
                             'optmesospherebincurl_-s_-S_http:localhost:62080v1vips.output.gz',
                             'optmesospherebincurl_-s_-S_http:localhost:62080v1records.output.gz',
                             'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsdefault.output.gz',
                             'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsdns.output.gz',
                             'optmesospherebincurl_-s_-S_http:localhost:62080v1metricsmesos_listener.output.gz',
                             'optmesospherebincurl_-s_-S_http:localhost:62080v1metricslashup.output.gz',
                             'timedatectl.output.gz',
                             'binsh_-c_cat etc*-release.output.gz',
                             'systemctl_list-units_dcos*.output.gz',
                             'sestatus.output.gz',
                             'iptables-save.output.gz',
                             'ip6tables-save.output.gz',
                             'ipset_list.output.gz',
                             'opt/mesosphere/active.buildinfo.full.json.gz',
                             'opt/mesosphere/etc/dcos-version.json.gz',
                             'opt/mesosphere/etc/expanded.config.json.gz',
                             'opt/mesosphere/etc/user.config.yaml.gz',
                             'dcos-diagnostics-health.json',
                             'var/lib/dcos/cluster-id.gz',
                             'proc/cmdline.gz',
                             'proc/cpuinfo.gz',
                             'proc/meminfo.gz',
                             'proc/self/mountinfo.gz',
                             'optmesospherebindetect_ip.output.gz',
                             'sysctl_-a.output.gz',
                             ]

    # these files are expected to be in archive for a master host
    expected_master_files = [
        'binsh_-c_cat proc`systemctl show dcos-mesos-master.service -p MainPID| cut -d\'=\' -f2`environ.output.gz',
        '5050-quota.json',
        '5050-overlay-master_state.json.gz',
        'dcos-mesos-master.service.gz',
        'var/lib/dcos/exhibitor/zookeeper/snapshot/myid.gz',
        'var/lib/dcos/exhibitor/conf/zoo.cfg.gz',
        'var/lib/dcos/mesos/log/mesos-master.log.gz',
        'var/lib/dcos/mesos/log/mesos-master.log.1.gz',
        'var/lib/dcos/mesos/log/mesos-master.log.2.gz.gz',
        'var/lib/dcos/mesos/log/mesos-master.log.3.gz.gz',
    ] + expected_common_files

    expected_agent_common_files = [
        '5051-containers.json',
        '5051-overlay-agent_overlay.json',
        'var/log/mesos/mesos-agent.log.gz',
        'docker_--version.output.gz',
        'docker_ps.output.gz',
    ]

    # for agent host
    expected_agent_files = [
        'dcos-mesos-slave.service.gz',
        'binsh_-c_cat proc`systemctl show dcos-mesos-slave.service -p MainPID| cut -d\'=\' -f2`environ.output.gz'
    ] + expected_agent_common_files + expected_common_files

    # for public agent host
    expected_public_agent_files = [
        'dcos-mesos-slave-public.service.gz',
        'binsh_-c_cat proc`systemctl show dcos-mesos-slave-public.service -p MainPID| cut -d\'=\' -f2`environ.output.gz'
    ] + expected_agent_common_files + expected_common_files

    def _read_from_zip(z: zipfile.ZipFile, item: str, to_json=True):
        # raises KeyError if item is not in zipfile.
        item_content = z.read(item).decode()

        if to_json:
            # raises ValueError if cannot deserialize item_content.
            return json.loads(item_content)

        return item_content

    def _get_dcos_diagnostics_health(z: zipfile.ZipFile, item: str):
        # try to load dcos-diagnostics health report and validate the report is for this host
        try:
            _health_report = _read_from_zip(z, item)
        except KeyError:
            # we did not find a key in archive, let's take a look at items in archive and try to read
            # diagnostics logs.

            # namelist() gets a list of all items in a zip archive.
            logging.info(z.namelist())

            # summaryErrorsReport.txt and summaryReport.txt are diagnostic job log files.
            for log in ('summaryErrorsReport.txt', 'summaryReport.txt'):
                try:
                    log_data = _read_from_zip(z, log, to_json=False)
                    logging.info("{}:\n{}".format(log, log_data))
                except KeyError:
                    logging.info("Could not read {}".format(log))
            raise

        except ValueError:
            logging.info("Could not deserialize dcos-diagnostics-health")
            raise

        return _health_report

    with tempfile.TemporaryDirectory() as tmp_dir:
        bundle_full_location = os.path.join(tmp_dir, bundle)
        with open(bundle_full_location, 'wb') as f:
            r = dcos_api_session.health.get(os.path.join('/report/diagnostics/serve', bundle), stream=True,
                                            node=dcos_api_session.masters[master_index])

            for chunk in r.iter_content(1024):
                f.write(chunk)

        # validate bundle zip file.
        assert zipfile.is_zipfile(bundle_full_location)
        z = zipfile.ZipFile(bundle_full_location)

        # get a list of all files in a zip archive.
        archived_items = z.namelist()

        # validate error log is empty
        if 'summaryErrorsReport.txt' in archived_items:
            log_data = _read_from_zip(z, 'summaryErrorsReport.txt', to_json=False)
            raise AssertionError('summaryErrorsReport.txt must be empty. Got {}'.format(log_data))

        # validate all files in zip archive are not empty
        for item in archived_items:
            assert z.getinfo(item).file_size, 'item {} is empty'.format(item)

        # make sure all required log files for master node are in place.
        for master_ip in dcos_api_session.masters:
            master_folder = master_ip + '_master/'

            # try to load dcos-diagnostics health report and validate the report is for this host
            health_report = _get_dcos_diagnostics_health(z, master_folder + 'dcos-diagnostics-health.json')
            assert 'ip' in health_report
            assert health_report['ip'] == master_ip

            # make sure systemd unit output is correct and does not contain error message
            gzipped_unit_output = z.open(master_folder + 'dcos-mesos-master.service.gz')
            verify_unit_response(gzipped_unit_output, 100)

            verify_archived_items(master_folder, archived_items, expected_master_files)

            gzipped_state_output = z.open(master_folder + '5050-master_state.json.gz')
            validate_state(gzipped_state_output)

        # make sure all required log files for agent node are in place.
        for slave_ip in dcos_api_session.slaves:
            agent_folder = slave_ip + '_agent/'

            # try to load dcos-diagnostics health report and validate the report is for this host
            health_report = _get_dcos_diagnostics_health(z, agent_folder + 'dcos-diagnostics-health.json')
            assert 'ip' in health_report
            assert health_report['ip'] == slave_ip

            # make sure systemd unit output is correct and does not contain error message
            gzipped_unit_output = z.open(agent_folder + 'dcos-mesos-slave.service.gz')
            verify_unit_response(gzipped_unit_output, 100)

            verify_archived_items(agent_folder, archived_items, expected_agent_files)

        # make sure all required log files for public agent node are in place.
        for public_slave_ip in dcos_api_session.public_slaves:
            agent_public_folder = public_slave_ip + '_agent_public/'

            # try to load dcos-diagnostics health report and validate the report is for this host
            health_report = _get_dcos_diagnostics_health(z, agent_public_folder + 'dcos-diagnostics-health.json')
            assert 'ip' in health_report
            assert health_report['ip'] == public_slave_ip

            # make sure systemd unit output is correct and does not contain error message
            gzipped_unit_output = z.open(agent_public_folder + 'dcos-mesos-slave-public.service.gz')
            verify_unit_response(gzipped_unit_output, 100)

            verify_archived_items(agent_public_folder, archived_items, expected_public_agent_files)