Esempio n. 1
0
def check_mains_libs(data, **kwargs):
    mains = data.get("mains", [])
    libs = data.get("libs", [])
    job_type, subtype = edp.split_job_type(data.get("type"))
    streaming = job_type == "MapReduce" and subtype == "Streaming"

    # Pig or Hive flow has to contain script in mains, may also use libs
    if job_type in ['Pig', 'Hive']:
        if not mains:
            raise e.InvalidDataException("%s flow requires main script" %
                                         data.get("type"))
        # Check for overlap
        if set(mains).intersection(set(libs)):
            raise e.InvalidDataException("'mains' and 'libs' overlap")

    else:
        if not streaming and not libs:
            raise e.InvalidDataException("%s flow requires libs" %
                                         data.get("type"))
        if mains:
            raise e.InvalidDataException("%s flow does not use mains" %
                                         data.get("type"))

    # Make sure that all referenced binaries exist
    _check_binaries(mains)
    _check_binaries(libs)
Esempio n. 2
0
def _check_swift_data_source_create(data):
    if len(data['url']) == 0:
        raise ex.InvalidDataException(_("Swift url must not be empty"))
    url = urlparse.urlparse(data['url'])
    if url.scheme != "swift":
        raise ex.InvalidDataException(_("URL scheme must be 'swift'"))

    # The swift url suffix does not have to be included in the netloc.
    # However, if the swift suffix indicator is part of the netloc then
    # we require the right suffix.
    # Additionally, the path must be more than '/'
    if (su.SWIFT_URL_SUFFIX_START in url.netloc and not url.netloc.endswith(
            su.SWIFT_URL_SUFFIX)) or len(url.path) <= 1:
        raise ex.InvalidDataException(
            _("URL must be of the form swift://container%s/object") %
            su.SWIFT_URL_SUFFIX)

    if not CONF.use_domain_for_proxy_users and "credentials" not in data:
        raise ex.InvalidCredentials(_("No credentials provided for Swift"))
    if not CONF.use_domain_for_proxy_users and ("user"
                                                not in data["credentials"]):
        raise ex.InvalidCredentials(
            _("User is not provided in credentials for Swift"))
    if not CONF.use_domain_for_proxy_users and ("password"
                                                not in data["credentials"]):
        raise ex.InvalidCredentials(
            _("Password is not provided in credentials for Swift"))
Esempio n. 3
0
def check_shares(data):
    if not data:
        return

    paths = (share.get('path') for share in data)
    paths = [path for path in paths if path is not None]
    if len(paths) != len(set(paths)):
        raise ex.InvalidDataException(
            _('Multiple shares cannot be mounted to the same path.'))

    for path in paths:
        if not path.startswith('/') or '\x00' in path:
            raise ex.InvalidDataException(
                _('Paths must be absolute Linux paths starting with "/" '
                  'and may not contain nulls.'))

    client = manila.client()
    for share in data:
        manila_share = manila.get_share(client, share['id'])
        if not manila_share:
            raise ex.InvalidReferenceException(
                _("Requested share id %s does not exist.") % share['id'])

        share_type = manila_share.share_proto
        if share_type not in shares.SUPPORTED_SHARE_TYPES:
            raise ex.InvalidReferenceException(
                _("Requested share id %(id)s is of type %(type)s, which is "
                  "not supported by Sahara.") % {
                      "id": share['id'],
                      "type": share_type
                  })
Esempio n. 4
0
def check_mains_libs(data, **kwargs):
    mains = data.get("mains", [])
    libs = data.get("libs", [])
    job_type, subtype = edp.split_job_type(data.get("type"))
    streaming = (job_type == edp.JOB_TYPE_MAPREDUCE and
                 subtype == edp.JOB_SUBTYPE_STREAMING)

    # Pig or Hive flow has to contain script in mains, may also use libs
    if job_type in [edp.JOB_TYPE_PIG, edp.JOB_TYPE_HIVE]:
        if not mains:
            raise e.InvalidDataException("%s flow requires main script" %
                                         data.get("type"))
        # Check for overlap
        if set(mains).intersection(set(libs)):
            raise e.InvalidDataException("'mains' and 'libs' overlap")

    else:
        if not streaming and not libs:
            raise e.InvalidDataException("%s flow requires libs" %
                                         data.get("type"))
        if mains:
            raise e.InvalidDataException("%s flow does not use mains" %
                                         data.get("type"))

    # Make sure that all referenced binaries exist
    _check_binaries(mains)
    _check_binaries(libs)
Esempio n. 5
0
def check_job_executor(data, job_id):
    job = api.get_job(job_id)
    job_type, subtype = edp.split_job_type(job.type)

    # Check if cluster contains Oozie service to run job
    main_base.check_edp_job_support(data['cluster_id'])

    # All types except Java require input and output objects
    if job_type == 'Java':
        if not _is_main_class_present(data):
            raise ex.InvalidDataException('Java job must '
                                          'specify edp.java.main_class')
    else:
        if not ('input_id' in data and 'output_id' in data):
            raise ex.InvalidDataException("%s job requires 'input_id' "
                                          "and 'output_id'" % job.type)

        b.check_data_source_exists(data['input_id'])
        b.check_data_source_exists(data['output_id'])

        b.check_data_sources_are_different(data['input_id'], data['output_id'])

        if job_type == 'MapReduce' and (
                subtype == 'Streaming' and not _streaming_present(data)):
            raise ex.InvalidDataException("%s job "
                                          "must specify streaming mapper "
                                          "and reducer" % job.type)

    main_base.check_cluster_exists(data['cluster_id'])
Esempio n. 6
0
 def _validate_url(self, url):
     if len(url) == 0:
         raise ex.InvalidDataException(_("MapR FS url must not be empty"))
     url = urlparse.urlparse(url)
     if url.scheme:
         if url.scheme != "maprfs":
             raise ex.InvalidDataException(_("URL scheme must be 'maprfs'"))
Esempio n. 7
0
def _check_maprfs_data_source_create(data):
    if len(data['url']) == 0:
        raise ex.InvalidDataException(_("MapR FS url must not be empty"))
    url = urlparse.urlparse(data['url'])
    if url.scheme:
        if url.scheme != "maprfs":
            raise ex.InvalidDataException(_("URL scheme must be 'maprfs'"))
Esempio n. 8
0
def check_mains_libs(data, **kwargs):
    mains = data.get("mains", [])
    libs = data.get("libs", [])
    job_type, subtype = edp.split_job_type(data.get("type"))
    streaming = (job_type == edp.JOB_TYPE_MAPREDUCE
                 and subtype == edp.JOB_SUBTYPE_STREAMING)

    # These types must have a value in mains and may also use libs
    if job_type in [edp.JOB_TYPE_PIG, edp.JOB_TYPE_HIVE, edp.JOB_TYPE_SPARK]:
        if not mains:
            if job_type == edp.JOB_TYPE_SPARK:
                msg = _("%s job requires main application jar") % data.get(
                    "type")
            else:
                msg = _("%s flow requires main script") % data.get("type")
            raise e.InvalidDataException(msg)
        # Check for overlap
        if set(mains).intersection(set(libs)):
            raise e.InvalidDataException(_("'mains' and 'libs' overlap"))

    else:
        # Java and MapReduce require libs, but MapReduce.Streaming does not
        if not streaming and not libs:
            raise e.InvalidDataException(
                _("%s flow requires libs") % data.get("type"))
        if mains:
            raise e.InvalidDataException(
                _("%s flow does not use mains") % data.get("type"))

    # Make sure that all referenced binaries exist
    _check_binaries(mains)
    _check_binaries(libs)
Esempio n. 9
0
 def _validate_url(self, url):
     if len(url) == 0:
         raise ex.InvalidDataException(_("HDFS url must not be empty"))
     url = urlparse.urlparse(url)
     if url.scheme:
         if url.scheme != "hdfs":
             raise ex.InvalidDataException(_("URL scheme must be 'hdfs'"))
         if not url.hostname:
             raise ex.InvalidDataException(
                 _("HDFS url is incorrect, cannot determine a hostname"))
Esempio n. 10
0
 def _validate_labels_update(self, default_data, update_values):
     for label in update_values.keys():
         if label not in default_data.keys():
             raise ex.InvalidDataException(
                 _("Label '%s' can't be updated because it's not "
                   "available for plugin or its version") % label)
         if not default_data[label][MUTABLE]:
             raise ex.InvalidDataException(
                 _("Label '%s' can't be updated because it's not "
                   "mutable") % label)
Esempio n. 11
0
 def get_service(self, node_process):
     ui_name = self.get_service_name_by_node_process(node_process)
     if ui_name is None:
         raise e.InvalidDataException(
             _('Service not found in services list'))
     version = self.get_chosen_service_version(ui_name)
     service = self._find_service_instance(ui_name, version)
     if service is None:
         raise e.InvalidDataException(_('Can not map service'))
     return service
Esempio n. 12
0
def _check_manila_data_source_create(data):
    if len(data['url']) == 0:
        raise ex.InvalidDataException(_("Manila url must not be empty"))
    url = urlparse.urlparse(data['url'])
    if url.scheme != "manila":
        raise ex.InvalidDataException(_("Manila url scheme must be 'manila'"))
    if not uuidutils.is_uuid_like(url.netloc):
        raise ex.InvalidDataException(_("Manila url netloc must be a uuid"))
    if not url.path:
        raise ex.InvalidDataException(_("Manila url path must not be empty"))
Esempio n. 13
0
def _check_hdfs_data_source_create(data):
    if len(data['url']) == 0:
        raise ex.InvalidDataException(_("HDFS url must not be empty"))
    url = urlparse.urlparse(data['url'])
    if url.scheme:
        if url.scheme != "hdfs":
            raise ex.InvalidDataException(_("URL scheme must be 'hdfs'"))
        if not url.hostname:
            raise ex.InvalidDataException(
                _("HDFS url is incorrect, cannot determine a hostname"))
Esempio n. 14
0
 def _validate_url(self, url):
     if len(url) == 0:
         raise ex.InvalidDataException(
             _("Internal data base url must not be empty"))
     url = urlparse.urlparse(url)
     if url.scheme != "internal-db":
         raise ex.InvalidDataException(
             _("URL scheme must be 'internal-db'"))
     if not uuidutils.is_uuid_like(url.netloc):
         raise ex.InvalidDataException(
             _("Internal data base url netloc must be a uuid"))
Esempio n. 15
0
 def _validate_url(self, url):
     if len(url) == 0:
         raise ex.InvalidDataException(_("Manila url must not be empty"))
     url = urlparse.urlparse(url)
     if url.scheme != "manila":
         raise ex.InvalidDataException(_("Manila url scheme must be"
                                         " 'manila'"))
     if not uuidutils.is_uuid_like(url.netloc):
         raise ex.InvalidDataException(_("Manila url netloc must be a"
                                         " uuid"))
     if not url.path:
         raise ex.InvalidDataException(_("Manila url path must not be"
                                         " empty"))
Esempio n. 16
0
    def _validate_url(self, url):
        if len(url) == 0:
            raise ex.InvalidDataException(_("S3 url must not be empty"))

        url = urlparse.urlparse(url)
        if url.scheme not in ["s3", "s3a"]:
            raise ex.InvalidDataException(
                _("URL scheme must be 's3' or 's3a'"))

        if not url.hostname:
            raise ex.InvalidDataException(_("Bucket name must be present"))

        if not url.path:
            raise ex.InvalidDataException(_("Object name must be present"))
Esempio n. 17
0
def _check_datasource_placeholder(url):
    if url is None:
        return
    total_length = 0
    substrings = re.findall(r"%RANDSTR\(([\-]?\d+)\)%", url)
    for length in map(int, substrings):
        if length <= 0:
            raise ex.InvalidDataException(_("Requested RANDSTR length"
                                            " must be positive."))
        total_length += length

    if total_length > 1024:
        raise ex.InvalidDataException(_("Requested RANDSTR length is"
                                        " too long, please choose a "
                                        "value less than 1024."))
Esempio n. 18
0
    def validate_job_execution(self, cluster, job, data):
        if not self.edp_supported(cluster.hadoop_version):
            raise ex.InvalidDataException(
                _('Spark {base} or higher required to run {type} jobs').format(
                    base=EdpEngine.edp_base_version, type=job.type))

        super(EdpEngine, self).validate_job_execution(cluster, job, data)
Esempio n. 19
0
    def _create_config_obj(self,
                           item,
                           target='general',
                           scope='cluster',
                           high_priority=False):
        def _prepare_value(value):
            if isinstance(value, str):
                return value.strip().lower()
            return value

        conf_name = _prepare_value(item.get('name', None))

        conf_value = _prepare_value(item.get('value', None))

        if not conf_name:
            raise ex.HadoopProvisionError(_("Config missing 'name'"))

        if conf_value is None:
            raise e.InvalidDataException(
                _("Config '%s' missing 'value'") % conf_name)

        if high_priority or item.get('priority', 2) == 1:
            priority = 1
        else:
            priority = 2

        return p.Config(name=conf_name,
                        applicable_target=target,
                        scope=scope,
                        config_type=item.get('config_type', "string"),
                        config_values=item.get('config_values', None),
                        default_value=conf_value,
                        is_optional=item.get('is_optional', True),
                        description=item.get('description', None),
                        priority=priority)
Esempio n. 20
0
    def _initialize(self, config):
        for configuration in self.config['configurations']:
            for service_property in configuration['properties']:
                config = p.Config(service_property['name'],
                                  self._get_target(
                                      service_property['applicable_target']),
                                  service_property['scope'],
                                  config_type=
                                  service_property['config_type'],
                                  default_value=service_property
                                  ['default_value'],
                                  is_optional=service_property[
                                      'is_optional'],
                                  description=service_property[
                                      'description'])

                setattr(config, 'tag', configuration['tag'].rsplit(".", 1)[0])
                self.config_items.append(config)
                #TODO(jspeidel): an assumption is made that property names
                # are unique across configuration sections which is dangerous
                property_name = service_property['name']
                # if property already exists, throw an exception
                if property_name in self.config_mapper:
                    # internal error
                    # ambari-config-resource contains duplicates
                    raise exceptions.InvalidDataException(
                        'Internal Error. Duplicate property '
                        'name detected: %s' % property_name)
                self.config_mapper[service_property['name']] = \
                    self._get_target(
                        service_property['applicable_target'])
Esempio n. 21
0
def _build_cluster_schema(api_version='v1'):
    if api_version == 'v1':
        cluster_schema = copy.deepcopy(ct_schema.CLUSTER_TEMPLATE_SCHEMA)
    elif api_version == 'v2':
        cluster_schema = copy.deepcopy(ct_schema.CLUSTER_TEMPLATE_SCHEMA_V2)
    else:
        raise ex.InvalidDataException('Invalid API version %s' % api_version)

    cluster_schema['properties'].update({
        "is_transient": {
            "type": "boolean"
        },
        "user_keypair_id": {
            "type": "string",
            "format": "valid_keypair_name",
        },
        "cluster_template_id": {
            "type": "string",
            "format": "uuid",
        }
    })

    if api_version == 'v2':
        cluster_schema['properties'].update({"count": {"type": "integer"}})
    return cluster_schema
Esempio n. 22
0
        def handler(*args, **kwargs):
            # NOTE (vgridnev): We should know information about instance,
            #                  so we should find instance in args or kwargs.
            #                  Also, we import sahara.conductor.resource
            #                  to check some object is Instance

            instance = None
            for arg in args:
                if isinstance(arg, resource.InstanceResource):
                    instance = arg

            for kw_arg in kwargs.values():
                if isinstance(kw_arg, resource.InstanceResource):
                    instance = kw_arg

            if instance is None:
                raise exceptions.InvalidDataException(
                    _("Function should have an Instance as argument"))

            try:
                value = func(*args, **kwargs)
            except Exception as e:
                with excutils.save_and_reraise_exception():
                    add_fail_event(instance, e)

            if mark_successful_on_exit:
                add_successful_event(instance)

            return value
Esempio n. 23
0
 def get_data_source_by_url(self, url):
     url = urlparse.urlparse(url)
     if not url.scheme:
         raise ex.InvalidDataException(
             _("Data source url must have a"
               " scheme"))
     return self.get_data_source(url.scheme)
Esempio n. 24
0
File: api.py Progetto: wuhsh/sahara
def request_data():
    if hasattr(flask.request, 'parsed_data'):
        return flask.request.parsed_data

    if (flask.request.content_length is None
            or not flask.request.content_length > 0):
        LOG.debug("Empty body provided in request")
        return dict()

    if flask.request.file_upload:
        return flask.request.data

    deserializer = None
    content_type = flask.request.mimetype
    if not content_type or content_type in RT_JSON:
        deserializer = wsgi.JSONDeserializer()
    else:
        raise ex.InvalidDataException(
            _("Content type '%s' isn't supported") % content_type)

    # parsed request data to avoid unwanted re-parsings
    parsed_data = deserializer.deserialize(flask.request.data)['body']
    flask.request.parsed_data = parsed_data

    return flask.request.parsed_data
Esempio n. 25
0
    def validate_job_execution(self, cluster, job, data):
        if cluster.hadoop_version < "1.0.0":
            raise ex.InvalidDataException(
                _('Spark 1.0.0 or higher required to run spark %s jobs') %
                job.type)

        super(EdpEngine, self).validate_job_execution(cluster, job, data)
Esempio n. 26
0
def check_scheduled_job_execution_info(job_execution_info):
    start = job_execution_info.get('start', None)
    if start is None:
        raise ex.InvalidDataException(
            _("Scheduled job must specify start time"))
    try:
        start = time.strptime(start, "%Y-%m-%d %H:%M:%S")
        start = timeutils.datetime.datetime.fromtimestamp(time.mktime(start))
    except Exception:
        raise ex.InvalidDataException(_("Invalid Time Format"))

    now_time = timeutils.utcnow()

    if timeutils.delta_seconds(now_time, start) < 0:
        raise ex.InvalidJobExecutionInfoException(
            _("Job start time should be later than now"))
Esempio n. 27
0
def check_data_sources_are_different(data_source_1_id, data_source_2_id):
    ds1 = conductor.data_source_get(context.ctx(), data_source_1_id)
    ds2 = conductor.data_source_get(context.ctx(), data_source_2_id)

    if ds1.type == ds2.type and ds1.url == ds2.url:
        raise ex.InvalidDataException(_('Provided input and output '
                                        'DataSources reference the same '
                                        'location: %s') % ds1.url)
Esempio n. 28
0
def check_execution_interface(data, job):
    job_int = {arg.name: arg for arg in job.interface}
    execution_int = data.get("interface", None)

    if not (job_int or execution_int):
        return
    if job_int and execution_int is None:
        raise e.InvalidDataException(
            _("An interface was specified with the template for this job. "
              "Please pass an interface map with this job (even if empty)."))

    execution_names = set(execution_int.keys())

    definition_names = set(job_int.keys())
    not_found_names = execution_names - definition_names
    if not_found_names:
        raise e.InvalidDataException(
            _("Argument names: %s were not found in the interface for this "
              "job.") % str(list(not_found_names)))

    required_names = {arg.name for arg in job.interface if arg.required}
    unset_names = required_names - execution_names
    if unset_names:
        raise e.InvalidDataException(
            _("Argument names: %s are required for "
              "this job.") % str(list(unset_names)))

    nonexistent = object()
    for name, value in six.iteritems(execution_int):
        arg = job_int[name]
        _validate_value(arg.value_type, value)
        if arg.mapping_type == "args":
            continue
        typed_configs = data.get("job_configs", {}).get(arg.mapping_type, {})
        config_value = typed_configs.get(arg.location, nonexistent)
        if config_value is not nonexistent and config_value != value:
            args = {
                "name": name,
                "mapping_type": arg.mapping_type,
                "location": arg.location
            }
            raise e.InvalidDataException(
                _("Argument '%(name)s' was passed both through the interface "
                  "and in location '%(mapping_type)s'.'%(location)s'. Please "
                  "pass this through either the interface or the "
                  "configuration maps, not both.") % args)
Esempio n. 29
0
def check_data_sources_are_different(data_source_1_id, data_source_2_id):
    ds1 = api.get_data_source(data_source_1_id)
    ds2 = api.get_data_source(data_source_2_id)

    if ds1.type == ds2.type and ds1.url == ds2.url:
        raise ex.InvalidDataException('Provided input and output '
                                      'DataSources reference the same '
                                      'location: %s' % ds1.url)
Esempio n. 30
0
    def validate(self, data):
        self._validate_url(data['url'])

        # Do validation loosely, and don't require much... the user might have
        # (by their own preference) set some or all configs manually

        if "credentials" not in data:
            return

        for key in data["credentials"].keys():
            if key not in self.configs_map.keys():
                raise ex.InvalidDataException(
                    _("Unknown config '%s' for S3 data source") % key)
            if key in self.bool_keys:
                if not isinstance(data["credentials"][key], bool):
                    raise ex.InvalidDataException(
                        _("Config '%s' must be boolean") % key)