Python DataCollector.run_collection Exemples, data_collector.DataCollector.run_collection Python Exemples

Exemple #1

0

Afficher le fichier

def collect_data_and_upload(rc=0):
    """
    All the heavy lifting done here
    Run through "targets" - could be just ONE (host, default) or ONE (container/image)
    """
    # initialize collection targets
    # for now we do either containers OR host -- not both at same time
    if InsightsClient.options.container_mode:
        logger.debug("Client running in container/image mode.")
        logger.debug("Scanning for matching container/image.")
        targets = get_targets()
    else:
        logger.debug("Host selected as scanning target.")
        targets = constants.default_target

    # if there are no targets to scan then bail
    if not len(targets):
        logger.debug("No targets were found. Exiting.")
        sys.exit(1)

    if InsightsClient.options.offline:
        logger.warning("Assuming remote branch and leaf value of -1")
        pconn = None
        branch_info = constants.default_branch_info
    else:
        pconn = InsightsConnection()

    # TODO: change these err msgs to be more meaningful , i.e.
    # "could not determine login information"
    if pconn:
        try:
            branch_info = pconn.branch_info()
        except requests.ConnectionError:
            branch_info = handle_branch_info_error(
                "Could not connect to determine branch information")
        except LookupError:
            branch_info = handle_branch_info_error(
                "Could not determine branch information")
    pc = InsightsConfig(pconn)
    tar_file = None

    if InsightsClient.options.just_upload:
        if not os.path.exists(InsightsClient.options.just_upload):
            logger.error('No file %s', InsightsClient.options.just_upload)
            return 1
        tar_file = InsightsClient.options.just_upload
        rc = _do_upload(pconn, tar_file, 'dummy', 0)
        return rc

    # load config from stdin/file if specified
    try:
        stdin_config = {}
        if InsightsClient.options.from_file:
            with open(InsightsClient.options.from_file, 'r') as f:
                stdin_config = json.load(f)
        elif InsightsClient.options.from_stdin:
            stdin_config = json.load(sys.stdin)
        if ((InsightsClient.options.from_file
             or InsightsClient.options.from_stdin)
                and ('uploader.json' not in stdin_config
                     or 'sig' not in stdin_config)):
            raise ValueError
        if ((InsightsClient.options.from_file
             or InsightsClient.options.from_stdin)
                and 'branch_info' in stdin_config
                and stdin_config['branch_info'] is not None):
            branch_info = stdin_config['branch_info']
    except:
        logger.error('ERROR: Invalid config for %s! Exiting...',
                     ('--from-file'
                      if InsightsClient.options.from_file else '--from-stdin'))
        sys.exit(1)

    start = time.clock()
    collection_rules, rm_conf = pc.get_conf(InsightsClient.options.update,
                                            stdin_config)
    collection_elapsed = (time.clock() - start)
    logger.debug("Rules configuration loaded. Elapsed time: %s",
                 collection_elapsed)

    individual_archives = []

    for t in targets:
        # defaults
        archive = None
        container_connection = None
        mp = None
        obfuscate = None
        # archive metadata
        archive_meta = {}

        try:
            if t['type'] == 'docker_image':
                container_connection = open_image(t['name'])
                logging_name = 'Docker image ' + t['name']
                archive_meta['docker_id'] = t['name']
                archive_meta['display_name'] = docker_display_name(
                    t['name'], t['type'].replace('docker_', ''))
                logger.debug('Docker display_name: %s',
                             archive_meta['display_name'])
                logger.debug('Docker docker_id: %s', archive_meta['docker_id'])
                if container_connection:
                    mp = container_connection.get_fs()
                else:
                    logger.error('Could not open %s for analysis',
                                 logging_name)
                    sys.exit(1)
            elif t['type'] == 'docker_container':
                container_connection = open_container(t['name'])
                logging_name = 'Docker container ' + t['name']
                archive_meta['docker_id'] = t['name']
                archive_meta['display_name'] = docker_display_name(
                    t['name'], t['type'].replace('docker_', ''))
                logger.debug('Docker display_name: %s',
                             archive_meta['display_name'])
                logger.debug('Docker docker_id: %s', archive_meta['docker_id'])
                if container_connection:
                    mp = container_connection.get_fs()
                else:
                    logger.error('Could not open %s for analysis',
                                 logging_name)
                    sys.exit(1)
            elif t['type'] == 'host':
                logging_name = determine_hostname()
                archive_meta['display_name'] = determine_hostname(
                    InsightsClient.options.display_name)
            else:
                logger.error('Unexpected analysis target: %s', t['type'])
                sys.exit(1)

            archive_meta['type'] = t['type'].replace('docker_', '')
            archive_meta['product'] = 'Docker'
            archive_meta['system_id'] = generate_analysis_target_id(
                t['type'], t['name'])

            collection_start = time.clock()
            archive = InsightsArchive(
                compressor=InsightsClient.options.compressor
                if not InsightsClient.options.container_mode else "none",
                target_name=t['name'])
            atexit.register(_delete_archive, archive)
            dc = DataCollector(archive,
                               InsightsClient.config,
                               mountpoint=mp,
                               target_name=t['name'],
                               target_type=t['type'])

            logger.info('Starting to collect Insights data for %s',
                        logging_name)
            dc.run_collection(collection_rules, rm_conf, branch_info)
            elapsed = (time.clock() - start)
            logger.debug("Data collection complete. Elapsed time: %s", elapsed)

            obfuscate = InsightsClient.config.getboolean(APP_NAME, "obfuscate")

            # include rule refresh time in the duration
            collection_duration = (time.clock() -
                                   collection_start) + collection_elapsed

            # add custom metadata about a host if provided by from_file
            # use in the OSE case
            if InsightsClient.options.from_file:
                with open(InsightsClient.options.from_file, 'r') as f:
                    stdin_config = json.load(f)
                    if 'metadata' in stdin_config:
                        archive.add_metadata_to_archive(
                            json.dumps(stdin_config['metadata']),
                            'metadata.json')

            if InsightsClient.options.no_tar_file:
                logger.info('See Insights data in %s', dc.archive.archive_dir)
                return rc

            tar_file = dc.done(collection_rules, rm_conf)

            # add archives to list of individual uploads
            archive_meta['tar_file'] = tar_file
            individual_archives.append(archive_meta)

        finally:
            # called on loop iter end or unexpected exit
            if container_connection:
                container_connection.close()

    # if multiple targets (container mode), add all archives to single archive
    # if InsightsClient.options.container_mode:
    if False:  # we only run single collections now (not the uber archives), bypass this
        full_archive = InsightsArchive(
            compressor=InsightsClient.options.compressor)
        for a in individual_archives:
            shutil.copy(a['tar_file'], full_archive.archive_dir)
        # don't want insights_commands in meta archive
        shutil.rmtree(full_archive.cmd_dir)
        metadata = _create_metadata_json(individual_archives)
        full_archive.add_metadata_to_archive(json.dumps(metadata),
                                             'metadata.json')
        full_tar_file = full_archive.create_tar_file(full_archive=True)
    # if only one target (regular mode), just upload one
    else:
        full_archive = archive
        full_tar_file = tar_file

    if InsightsClient.options.offline or InsightsClient.options.no_upload:
        handle_file_output(full_tar_file, full_archive)
        return rc

    # do the upload
    rc = _do_upload(pconn, full_tar_file, logging_name, collection_duration)

    if InsightsClient.options.keep_archive:
        logger.info('Insights data retained in %s', full_tar_file)
        return rc
    if obfuscate:
        logger.info('Obfuscated Insights data retained in %s',
                    os.path.dirname(full_tar_file))
    full_archive.delete_archive_dir()
    return rc

Exemple #2

0

Afficher le fichier

Fichier : __init__.py Projet : matysek/insights-client

def collect_data_and_upload(rc=0):
    """
    All the heavy lifting done here
    Run through "targets" - could be just one (host, default) or many (containers+host)
    """
    # initialize collection targets
    # for now we do either containers OR host -- not both at same time
    if InsightsClient.options.container_mode:
        targets = get_targets()
        targets = targets + constants.default_target
    else:
        targets = constants.default_target

    if InsightsClient.options.offline:
        logger.warning("Assuming remote branch and leaf value of -1")
        pconn = None
        branch_info = constants.default_branch_info
    else:
        pconn = InsightsConnection()

    # TODO: change these err msgs to be more meaningful , i.e.
    # "could not determine login information"
    if pconn:
        try:
            branch_info = pconn.branch_info()
        except requests.ConnectionError:
            branch_info = handle_branch_info_error(
                "Could not connect to determine branch information")
        except LookupError:
            branch_info = handle_branch_info_error(
                "Could not determine branch information")
    pc = InsightsConfig(pconn)

    if InsightsClient.options.just_upload:
        if not os.path.exists(InsightsClient.options.just_upload):
            logger.error('No file %s', InsightsClient.options.just_upload)
            return 1
        tar_file = InsightsClient.options.just_upload
        rc = _do_upload(pconn, tar_file, 'dummy', 0)
        return rc

    # load config from stdin/file if specified
    try:
        stdin_config = {}
        if InsightsClient.options.from_file:
            with open(InsightsClient.options.from_file, 'r') as f:
                stdin_config = json.load(f)
        elif InsightsClient.options.from_stdin:
            stdin_config = json.load(sys.stdin)
        if ((InsightsClient.options.from_file or InsightsClient.options.from_stdin) and
            ('uploader.json' not in stdin_config or
             'sig' not in stdin_config)):
            raise ValueError
    except:
        logger.error('ERROR: Invalid config for %s! Exiting...',
                     ('--from-file' if InsightsClient.options.from_file else '--from-stdin'))
        sys.exit(1)

    start = time.clock()
    collection_rules, rm_conf = pc.get_conf(InsightsClient.options.update, stdin_config)
    collection_elapsed = (time.clock() - start)
    logger.debug("Rules configuration loaded. Elapsed time: %s", collection_elapsed)

    individual_archives = []

    for t in targets:
        # defaults
        archive = None
        container_connection = None
        mp = None
        obfuscate = None
        # archive metadata
        archive_meta = {}

        try:
            if t['type'] == 'docker_image':
                container_connection = open_image(t['name'])
                logging_name = 'Docker image ' + t['name']
                archive_meta['display_name'] = get_repotag(t['name'])
                archive_meta['docker_id'] = t['name']
                if container_connection:
                    mp = container_connection.get_fs()
                else:
                    logger.error('Could not open %s for analysis', logging_name)
                    continue
            elif t['type'] == 'docker_container':
                container_connection = open_container(t['name'])
                logging_name = 'Docker container ' + t['name']
                archive_meta['display_name'] = t['name']
                if container_connection:
                    mp = container_connection.get_fs()
                else:
                    logger.error('Could not open %s for analysis', logging_name)
                    continue
            elif t['type'] == 'host':
                logging_name = determine_hostname()
                archive_meta['display_name'] = determine_hostname(InsightsClient.options.display_name)
            else:
                logger.error('Unexpected analysis target: %s', t['type'])
                continue

            archive_meta['type'] = t['type'].replace('docker_', '')
            archive_meta['product'] = 'Docker'
            archive_meta['system_id'] = generate_analysis_target_id(t['type'], t['name'])

            collection_start = time.clock()
            archive = InsightsArchive(compressor=InsightsClient.options.compressor if not InsightsClient.options.container_mode else "none",
                                      target_name=t['name'])
            atexit.register(_delete_archive, archive)
            dc = DataCollector(archive,
                               mountpoint=mp,
                               target_name=t['name'],
                               target_type=t['type'])

            logger.info('Starting to collect Insights data for %s', logging_name)
            dc.run_collection(collection_rules, rm_conf, branch_info)
            elapsed = (time.clock() - start)
            logger.debug("Data collection complete. Elapsed time: %s", elapsed)

            obfuscate = InsightsClient.config.getboolean(APP_NAME, "obfuscate")

            # include rule refresh time in the duration
            collection_duration = (time.clock() - collection_start) + collection_elapsed

            if InsightsClient.options.no_tar_file:
                logger.info('See Insights data in %s', dc.archive.archive_dir)
                return rc

            tar_file = dc.done(collection_rules, rm_conf)

            # add archives to list of individual uploads
            archive_meta['tar_file'] = tar_file
            individual_archives.append(archive_meta)

        finally:
            # called on loop iter end or unexpected exit
            if container_connection:
                container_connection.close()

    # if multiple targets (container mode), add all archives to single archive
    if InsightsClient.options.container_mode:
        full_archive = InsightsArchive(compressor=InsightsClient.options.compressor)
        for a in individual_archives:
            shutil.copy(a['tar_file'], full_archive.archive_dir)
        # don't want insights_commands in meta archive
        shutil.rmtree(full_archive.cmd_dir)
        metadata = _create_metadata_json(individual_archives)
        full_archive.add_metadata_to_archive(json.dumps(metadata), 'metadata.json')
        full_tar_file = full_archive.create_tar_file(full_archive=True)
    # if only one target (regular mode), just upload one
    else:
        full_archive = archive
        full_tar_file = tar_file

    if InsightsClient.options.offline or InsightsClient.options.no_upload:
        handle_file_output(full_tar_file, full_archive)
        return rc

    # do the upload
    rc = _do_upload(pconn, full_tar_file, logging_name, collection_duration)

    if InsightsClient.options.keep_archive:
        logger.info('Insights data retained in %s', full_tar_file)
        return rc
    if obfuscate:
        logger.info('Obfuscated Insights data retained in %s',
                    os.path.dirname(full_tar_file))
    full_archive.delete_archive_dir()
    return rc

Exemple #3

0

Afficher le fichier

Fichier : client.py Projet : sagaraivale/insights-core

def collect(rc=0):
    """
    All the heavy lifting done here
    Run through "targets" - could be just ONE (host, default) or ONE (container/image)
    """
    # initialize collection targets
    # tar files
    if config['analyze_file'] is not None:
        logger.debug("Client analyzing a compress filesystem.")
        targets = [{
            'type':
            'compressed_file',
            'name':
            os.path.splitext(os.path.basename(config['analyze_file']))[0],
            'location':
            config['analyze_file']
        }]

    # mountpoints
    elif config['analyze_mountpoint'] is not None:
        logger.debug("Client analyzing a filesystem already mounted.")
        targets = [{
            'type':
            'mountpoint',
            'name':
            os.path.splitext(os.path.basename(
                config['analyze_mountpoint']))[0],
            'location':
            config['analyze_mountpoint']
        }]

    # container mode
    elif config['container_mode']:
        logger.debug("Client running in container/image mode.")
        logger.debug("Scanning for matching container/image.")

        from containers import get_targets
        targets = get_targets()

    # the host
    else:
        logger.debug("Host selected as scanning target.")
        targets = constants.default_target

    # if there are no targets to scan then bail
    if not len(targets):
        logger.debug("No targets were found. Exiting.")
        return False
    logger.debug("Found targets: ")
    logger.debug(targets)

    branch_info = get_branch_info()
    pc = InsightsConfig()
    tar_file = None

    # load config from stdin/file if specified
    try:
        stdin_config = {}
        if config['from_file']:
            with open(config['from_file'], 'r') as f:
                stdin_config = json.load(f)
        elif config['from_stdin']:
            stdin_config = json.load(sys.stdin)
        if ((config['from_file'] or config['from_stdin'])
                and ('uploader.json' not in stdin_config
                     or 'sig' not in stdin_config)):
            raise ValueError
        if ((config['from_file'] or config['from_stdin'])
                and 'branch_info' in stdin_config
                and stdin_config['branch_info'] is not None):
            branch_info = stdin_config['branch_info']
    except:
        logger.error(
            'ERROR: Invalid config for %s! Exiting...',
            ('--from-file' if config['from_file'] else '--from-stdin'))
        return False

    collection_rules, rm_conf = pc.get_conf(False, stdin_config)
    individual_archives = []

    for t in targets:
        # defaults
        archive = None
        container_connection = None
        mp = None
        compressed_filesystem = None
        # archive metadata
        archive_meta = {}

        try:

            # analyze docker images
            if t['type'] == 'docker_image':

                from containers import open_image
                container_connection = open_image(t['name'])
                logging_name = 'Docker image ' + t['name']
                archive_meta['docker_id'] = t['name']

                from containers import docker_display_name
                archive_meta['display_name'] = docker_display_name(
                    t['name'], t['type'].replace('docker_', ''))

                logger.debug('Docker display_name: %s',
                             archive_meta['display_name'])
                logger.debug('Docker docker_id: %s', archive_meta['docker_id'])

                if container_connection:
                    mp = container_connection.get_fs()
                else:
                    logger.error('Could not open %s for analysis',
                                 logging_name)
                    return False

            # analyze docker containers
            elif t['type'] == 'docker_container':
                from containers import open_container
                container_connection = open_container(t['name'])

                logging_name = 'Docker container ' + t['name']
                archive_meta['docker_id'] = t['name']

                from containers import docker_display_name
                archive_meta['display_name'] = docker_display_name(
                    t['name'], t['type'].replace('docker_', ''))
                logger.debug('Docker display_name: %s',
                             archive_meta['display_name'])
                logger.debug('Docker docker_id: %s', archive_meta['docker_id'])

                if container_connection:
                    mp = container_connection.get_fs()
                else:
                    logger.error('Could not open %s for analysis',
                                 logging_name)
                    return False

            # analyze compressed files
            elif t['type'] == 'compressed_file':

                logging_name = 'Compressed file ' + t[
                    'name'] + ' at location ' + t['location']

                from compressed_file import InsightsCompressedFile
                compressed_filesystem = InsightsCompressedFile(t['location'])

                if compressed_filesystem.is_tarfile is False:
                    logger.debug("Could not access compressed tar filesystem.")
                    return False

                mp = compressed_filesystem.get_filesystem_path()

            # analyze mountpoints
            elif t['type'] == 'mountpoint':

                logging_name = 'Filesystem ' + t['name'] + ' at location ' + t[
                    'location']
                mp = config['analyze_mountpoint']

            # analyze the host
            elif t['type'] == 'host':
                logging_name = determine_hostname()
                archive_meta['display_name'] = determine_hostname(
                    config['display_name'])

            # nothing found to analyze
            else:
                logger.error('Unexpected analysis target: %s', t['type'])
                return False

            archive_meta['type'] = t['type'].replace('docker_', '')
            archive_meta['product'] = 'Docker'

            machine_id = generate_analysis_target_id(t['type'], t['name'])
            archive_meta['system_id'] = machine_id
            archive_meta['machine_id'] = machine_id

            archive = InsightsArchive(compressor=config['compressor'] if
                                      not config['container_mode'] else "none",
                                      target_name=t['name'])

            # determine the target type and begin collection
            # we infer "docker_image" SPEC analysis for certain types
            if t['type'] in ["mountpoint", "compressed_file"]:
                target_type = "docker_image"
            else:
                target_type = t['type']
            logger.debug("Inferring target_type '%s' for SPEC collection",
                         target_type)
            logger.debug("Inferred from '%s'", t['type'])
            dc = DataCollector(archive,
                               config,
                               mountpoint=mp,
                               target_name=t['name'],
                               target_type=target_type)

            logger.info('Starting to collect Insights data for %s',
                        logging_name)
            dc.run_collection(collection_rules, rm_conf, branch_info)

            # add custom metadata about a host if provided by from_file
            # use in the OSE case
            if config['from_file']:
                with open(config['from_file'], 'r') as f:
                    stdin_config = json.load(f)
                    if 'metadata' in stdin_config:
                        archive.add_metadata_to_archive(
                            json.dumps(stdin_config['metadata']),
                            'metadata.json')

            tar_file = dc.done(collection_rules, rm_conf)

            # add archives to list of individual uploads
            archive_meta['tar_file'] = tar_file
            individual_archives.append(archive_meta)

        finally:
            # called on loop iter end or unexpected exit
            if container_connection:
                container_connection.close()

    # cleanup the temporary stuff for analyzing tar files
    if config['analyze_file'] is not None and compressed_filesystem is not None:
        compressed_filesystem.cleanup_temp_filesystem()

    return tar_file