def smart_path(path, mapping): # Try to prepend home_dir and FS scheme if needed. # If path starts by a parameter try to get its value from the list of parameters submitted by the user or the coordinator. # This dynamic checking enable the use of <prepares> statements in a workflow scheduled manually of by a coordinator. # The logic is a bit complicated but Oozie is not consistent with data paths, prepare, coordinator paths and Fs action. if not path.startswith('$') and not path.startswith( '/') and not path.startswith('hdfs://'): path = '/user/%(username)s/%(path)s' % { 'username': '******', 'path': path } if path.startswith('$'): variables = find_variables(path) for var in variables: prefix = '${%s}' % var if path.startswith(prefix): if var in mapping: if not mapping[var].startswith( 'hdfs://') and not mapping[var].startswith('$'): path = '%(nameNode)s%(path)s' % { 'nameNode': '${nameNode}', 'path': path } else: if not path.startswith('hdfs://'): path = '%(nameNode)s%(path)s' % { 'nameNode': '${nameNode}', 'path': path } return path
def _get_external_parameters(self, xml, properties=None): from oozie.models import DATASET_FREQUENCY parameters = dict([(var, '') for var in find_variables(xml, include_named=False) if not self._is_coordinator() or var not in DATASET_FREQUENCY]) if properties: parameters.update(dict([line.strip().split('=') for line in properties.split('\n') if not line.startswith('#') and len(line.strip().split('=')) == 2])) return parameters
def make_parameterization_form(query_str): """ Creates a django form on the fly with arguments from the query. """ variables = find_variables(query_str) if len(variables) > 0: class Form(forms.Form): for name in sorted(variables): locals()[name] = forms.CharField(required=True) return Form else: return None
def get_parameterization(request, query_str, query_form, design, is_explain): """ Figures out whether a design is parameterizable, and, if so, returns a form to fill out. Returns None if there's no parameterization to do. """ if query_form.cleaned_data["is_parameterized"]: variables = find_variables(query_str) form = make_parameterization_form(query_str) if form: return render("parameterization.mako", request, dict( form=form(prefix="parameterization"), design=design, explain=is_explain)) return None
def _parameterization_form(data): """ Returns a Django form appropriate to parameterizing data. """ variables = find_variables(data) class Form(forms.Form): # These are special-cased, since we have help-text available for them. if "input" in variables: input = forms.CharField(required=True, help_text="Path to input.") if "output" in variables: output = forms.CharField(required=True, help_text="Must be a non-existant directory.") for name in sorted(variables.difference(set(["intput", "output"]))): locals()[name]= forms.CharField(required=True) return Form
def _get_external_parameters(self, xml, properties=None): from oozie.models import DATASET_FREQUENCY parameters = dict( [(var, "") for var in find_variables(xml) if not self._is_coordinator() or var not in DATASET_FREQUENCY] ) if properties: parameters.update( dict( [ line.strip().split("=") for line in properties.split("\n") if not line.startswith("#") and len(line.strip().split("=")) == 2 ] ) ) return parameters
def smart_path(path, mapping): # Try to prepend home_dir and FS scheme if needed. # If path starts by a parameter try to get its value from the list of parameters submitted by the user or the coordinator. # This dynamic checking enable the use of <prepares> statements in a workflow scheduled manually of by a coordinator. # The logic is a bit complicated but Oozie is not consistent with data paths, prepare, coordinator paths and Fs action. if not path.startswith('$') and not path.startswith('/') and not urlparse.urlsplit(path).scheme: path = '/user/%(username)s/%(path)s' % {'username': '******', 'path': path} if path.startswith('$'): variables = find_variables(path) for var in variables: prefix = '${%s}' % var if path.startswith(prefix): if var in mapping: if not urlparse.urlsplit(mapping[var]).scheme and not mapping[var].startswith('$'): path = '%(nameNode)s%(path)s' % {'nameNode': '${nameNode}', 'path': path} else: if not urlparse.urlsplit(path).scheme: path = '%(nameNode)s%(path)s' % {'nameNode': '${nameNode}', 'path': path} return path
def contains_symlink(path, mapping): vars = find_variables(path) return any([var in mapping and '#' in mapping[var] for var in vars]) or '#' in path
def test_find_variables(): A = dict(one='$a', two=dict(c='foo $b $$'), three=['${foo}', 'xxx ${foo}']) assert_equal(set(['a', 'b', 'foo']), find_variables(A))
def test_find_variables(): A = dict(one="$a", two=dict(c="foo $b $$"), three=["${foo}", "xxx ${foo}"]) assert_equal(set(["a", "b", "foo"]), find_variables(A))