예제 #1
0
def _extract_metadata_from_image_info_output(output):
    """Extract and validate spiders list from `shub-image-info` output."""
    def raise_shub_image_info_error(error):
        """Helper to raise ShubException with prefix and output"""
        msg = "shub-image-info: {} \n[output '{}']".format(error, output)
        raise ShubException(msg)

    try:
        metadata = json.loads(output)
        project_type = metadata.get('project_type')
    except (AttributeError, ValueError):
        raise_shub_image_info_error('output is not a valid JSON dict')
    if not isinstance(project_type, string_types):
        raise_shub_image_info_error(
            '"project_type" key is required and must be a string')

    spiders_list = metadata.get('spiders')
    if not isinstance(spiders_list, list):
        raise_shub_image_info_error(
            '"spiders" key is required and must be a list')
    spiders, scripts = [], []
    for name in spiders_list:
        if not (name and isinstance(name, string_types)):
            raise_shub_image_info_error(
                "spider name can't be empty or non-string")
        if project_type == 'scrapy' and name.startswith('py:'):
            scripts.append(name[3:])
        else:
            spiders.append(name)
    return {
        'project_type': project_type,
        'spiders': utils.valid_spiders(spiders),
        'scripts': utils.valid_spiders(scripts),
    }
예제 #2
0
파일: list.py 프로젝트: scrapinghub/shub
def _extract_metadata_from_image_info_output(output):
    """Extract and validate spiders list from `shub-image-info` output."""

    def raise_shub_image_info_error(error):
        """Helper to raise ShubException with prefix and output"""
        msg = "shub-image-info: {} \n[output '{}']".format(error, output)
        raise ShubException(msg)

    try:
        metadata = json.loads(output)
        project_type = metadata.get('project_type')
    except (AttributeError, ValueError):
        raise_shub_image_info_error('output is not a valid JSON dict')
    if not isinstance(project_type, string_types):
        raise_shub_image_info_error('"project_type" key is required and must be a string')

    spiders_list = metadata.get('spiders')
    if not isinstance(spiders_list, list):
        raise_shub_image_info_error('"spiders" key is required and must be a list')
    spiders, scripts = [], []
    for name in spiders_list:
        if not (name and isinstance(name, string_types)):
            raise_shub_image_info_error("spider name can't be empty or non-string")
        if project_type == 'scrapy' and name.startswith('py:'):
            scripts.append(name[3:])
        else:
            spiders.append(name)
    return {
        'project_type': project_type,
        'spiders': utils.valid_spiders(spiders),
        'scripts': utils.valid_spiders(scripts),
    }
예제 #3
0
def list_cmd(image_name, project, endpoint, apikey):
    """Short version of list cmd to use with deploy cmd."""
    settings = _get_project_settings(project, endpoint, apikey)
    environment = {'JOB_SETTINGS': json.dumps(settings)}
    exit_code, logs = _run_cmd_in_docker_container(image_name,
                                                   'shub-image-info',
                                                   environment)
    if exit_code == 0:
        return _extract_metadata_from_image_info_output(logs)
    # shub-image-info command not found, fallback to list-spiders
    elif exit_code == 127:
        # FIXME we should pass some value for SCRAPY_PROJECT_ID anyway
        # to handle `scrapy list` cmd properly via sh_scrapy entrypoint
        # environment['SCRAPY_PROJECT_ID'] = str(project) if project else ''
        exit_code, logs = _run_cmd_in_docker_container(image_name,
                                                       'list-spiders',
                                                       environment)
        if exit_code != 0:
            click.echo(logs)
            raise ShubException('Container with list cmd exited with code %s' %
                                exit_code)
        return {
            'project_type': 'scrapy',
            'spiders': utils.valid_spiders(logs.splitlines()),
        }
    else:
        click.echo(logs)
        raise ShubException(
            'Container with shub-image-info cmd exited with code %s' %
            exit_code)
예제 #4
0
파일: list.py 프로젝트: scrapinghub/shub
def list_cmd(image_name, project, endpoint, apikey):
    """Short version of list cmd to use with deploy cmd."""
    settings = _get_project_settings(project, endpoint, apikey)
    environment = {'JOB_SETTINGS': json.dumps(settings)}
    exit_code, logs = _run_cmd_in_docker_container(
        image_name, 'shub-image-info', environment)
    if exit_code == 0:
        return _extract_metadata_from_image_info_output(logs)
    # shub-image-info command not found, fallback to list-spiders
    elif exit_code == 127:
        # FIXME we should pass some value for SCRAPY_PROJECT_ID anyway
        # to handle `scrapy list` cmd properly via sh_scrapy entrypoint
        # environment['SCRAPY_PROJECT_ID'] = str(project) if project else ''
        exit_code, logs = _run_cmd_in_docker_container(
            image_name, 'list-spiders', environment)
        if exit_code != 0:
            click.echo(logs)
            raise ShubException('Container with list cmd exited with code %s' % exit_code)
        return {
            'project_type': 'scrapy',
            'spiders': utils.valid_spiders(logs.splitlines()),
        }
    else:
        click.echo(logs)
        raise ShubException(
            'Container with shub-image-info cmd exited with code %s' % exit_code)