Ejemplo n.º 1
0
Archivo: shell.py Proyecto: druths/xp
	def run(self,arg_str,context,cwd,content):
		"""
		Raises a CalledProcessError if this fails.
		"""
	
		if len(arg_str.strip()) > 0:
			logger.warn('shell block ignoring argument string: %s' % arg_str)
	
		cmd = '\n'.join(content)
		retcode = subprocess.call(cmd,shell=True,
								  cwd=cwd,env=get_total_context(context))
	
		if retcode != 0:
			raise CalledProcessError(retcode,cmd,None)
Ejemplo n.º 2
0
    def run(self, arg_str, context, cwd, content):
        """
		Raises a CalledProcessError if this fails.
		"""

        if len(arg_str.strip()) > 0:
            logger.warn('shell block ignoring argument string: %s' % arg_str)

        cmd = '\n'.join(content)
        retcode = subprocess.call(cmd,
                                  shell=True,
                                  cwd=cwd,
                                  env=get_total_context(context))

        if retcode != 0:
            raise CalledProcessError(retcode, cmd, None)
Ejemplo n.º 3
0
    def run(self,arg_str,context,cwd,content):
        """
        Raises a CalledProcessError if this fails.
        """
    
        # write python code to a tmp file
        fh,tmp_filename = tempfile.mkstemp(suffix='py')
        os.write(fh,'\n'.join(content).encode())
        os.close(fh)

        logger.debug('wrote python content to %s',tmp_filename)

        exec_name = context.get('PYTHON_CMD','python')
        cmd = '%s %s %s' % (exec_name,arg_str,tmp_filename)
        logger.debug('using cmd: %s',cmd)
        retcode = subprocess.call(cmd,shell=True,cwd=cwd,env=get_total_context(context))
    
        if retcode != 0:
            raise CalledProcessError(retcode,cmd,None)
Ejemplo n.º 4
0
def expand_variables(x,context,cwd,pipelines,source_file,lineno,nested=False):
    """
    This function will both parse variables in the 
    string (assumed to be one line of text) and replace them
    with their appropriate values given this context.

    ParseException is raised if syntax is bad.
    UnknownVariableException is raised if variables or functions can't be resolved.
    """
    
    cpos = 0
    
    while cpos < len(x):
        
        # handle escaping special character
        if x[cpos] == '\\':
            if cpos == len(x)-1:
                raise ParseException(source_file,lineno,'incomplete escape sequence at EOL')

            c = x[cpos+1]
            
            if c not in SUPPORTED_ESCAPABLE_CHARACTERS:
                raise ParseException(source_file,lineno,'invalid escape sequence \\%s' % c)
            replacement = c
            pre_escape = x[:cpos]
            post_escape = x[(cpos+2):]
            x = pre_escape + replacement + post_escape
            cpos = cpos+2
            
        elif x[cpos] == '$':
            # variable started!
            if cpos == len(x)-1:
                raise ParseException(source_file,lineno,'incomplete variable reference')

            # get the variable name
            m = variable_pattern.match(x[(cpos+1):])
            if m is None:
                # check if this is a shell call
                if cpos < len(x)-1 and x[cpos+1] == '(':
                    varname = ''
                else:
                    raise ParseException(source_file,lineno,'invalid variable reference')
            else:
                varname = None
                
                # this is a curly-brace-delimited variable
                if m.group(1) is None:
                    varname = m.group(2)

                    # remove curly braces
                    varname = varname[1:-1]
                        
                    # remove the curlies from the string x
                    x = x[:(cpos+1)] + varname + x[(cpos+len(varname)+3):]
                else: # not delimited by curly braces
                    varname = m.group(1)

            # if this variable reference is actually a function
            fxn_paren_pos = cpos+1+len(varname)
            if fxn_paren_pos < (len(x)-1) and x[fxn_paren_pos] == '(':
                fxn_argstart_pos = fxn_paren_pos + 1
                # we only support two functions
                if varname not in SUPPORTED_BUILTIN_FUNCTIONS:
                    raise UnknownVariableException(source_file,lineno,'invalid builtin function name: %s' % varname)

                # process the rest of the string
                expanded_x_part,eofxn = expand_variables(x[fxn_argstart_pos:],context,cwd,pipelines,source_file,lineno,nested=True)

                x = x[:fxn_argstart_pos] + expanded_x_part
                eofxn = fxn_argstart_pos + eofxn

                # extract arguments
                args_str = x[fxn_argstart_pos:eofxn]
                args = [x.strip() for x in args_str.split(',')]
                logger.debug('got fxn args: %s' % str(args))

                # apply the function
                ret_val = ''
                if varname == '':
                    ret_val = subprocess.check_output(args_str,shell=True,cwd=cwd,
                                                      env=get_total_context(context))

                    # convert the bytes into a string
                    ret_val = ret_val.decode()

                    if ret_val[-1] == '\n':
                        ret_val = ret_val[:-1]

                    logger.debug('expanded shell fxn to: %s' % ret_val)

                    if '\n' in ret_val:
                        raise Exception('inline shell functions cannot return strings containing newlines: %s' % ret_val)

                elif varname == 'PLN':
                    prefix = None

                    if len(args) == 1:
                        prefix = context[PIPELINE_PREFIX_VARNAME]
                        fname = args[0]
                    elif len(args) == 2:
                        pln_name = args[0]
                        
                        if pln_name not in pipelines:
                            raise Exception('unable to find pipeline with alias "%s"' % pln_name)

                        prefix = pipelines[pln_name].get_prefix()
                        logger.debug('PLN reference got prefix = %s' % prefix)
                        fname = args[1]
                    else:
                        # TODO: Add line number
                        raise Exception('too many arguments for $PLN(...) fxn')

                    ret_val = '%s%s' % (prefix,fname)

                # make the replacement
                pre_fxn = x[:cpos]
                post_fxn = x[(eofxn+1):]
                x = pre_fxn + ret_val + post_fxn
                cpos = len(pre_fxn) + len(ret_val)

            else:
                replacement = ''

                # figure out which context to use
                var_context = context
                if '.' in varname:
                    pln_name, varname = varname.split('.')

                    if pln_name not in pipelines:
                        raise UnknownVariableException(source_file,lineno,'pipeline %s is unknown' % pln_name)
                    else:
                        var_context = pipelines[pln_name].get_context()

                if varname not in var_context:
                    raise UnknownVariableException(source_file,lineno,'variable %s does not exist' % varname)

                replacement = var_context[varname]    
    
                # make the replacement
                pre_var = x[:cpos]
                post_var = x[(cpos+1+len(varname)):]
                x = pre_var + replacement + post_var
                cpos = cpos+1+len(replacement)
        
        elif nested and x[cpos] == ')':
            # We just found the end of a function (which we're nested inside of)
            return x,cpos

        else:
            cpos += 1

    # under normal circumstances, reaching the end of the string is what we want.
    # but if we are nested, then we should find a parenthesis first.
    if nested:
        raise ParseException(source_file,lineno,'expected to find a ")", none found')

    return x
Ejemplo n.º 5
0
    def run(self,arg_str,context,cwd,content):
        """
        Raises a CalledProcessError if this fails.
        """
        # get configuration
        HADOOP_CMD_EV = 'PYHMR_HADOOP_CMD'
        PYTHON_CMD_EV = 'PYHMR_PYTHON_CMD'
        STREAMING_API_JAR_EV = 'PYHMR_STREAMING_API_JAR'
        INPUT_EV = 'PYHMR_INPUT'
        OUTPUT_EV = 'PYHMR_OUTPUT'
        EXTRA_FILES_EV = 'PYHMR_EXTRA_FILES'
        NUM_REDUCERS_EV = 'PYHMR_NUM_REDUCERS'
        TEST_CMD_EV = 'PYHMR_TEST_CMD'
        TEST_OUTPUT_EV = 'PYHMR_TEST_OUTPUT'
    
        hadoop_cmd = context.get(HADOOP_CMD_EV,'hadoop')
        python_cmd = context.get(PYTHON_CMD_EV,'python')
    
        streaming_api_jar = context.get(STREAMING_API_JAR_EV)
        input_location = context.get(INPUT_EV)
        output_location = context.get(OUTPUT_EV)
    
        extra_files = context.get(EXTRA_FILES_EV,'')
        num_reducers = context.get(NUM_REDUCERS_EV,None)
    
        test_cmd = context.get(TEST_CMD_EV,None)
        test_output = context.get(TEST_OUTPUT_EV,None)
        is_test = test_cmd is not None
    
        # make the mapper file
        fh,mapper_filename = tempfile.mkstemp(suffix='py')
        os.write(fh,'\n'.join(content).encode())
        os.write(fh,"""
    
if __name__ == '__main__':
    import sys
    map(sys.stdin)
""".encode())
        os.close(fh)
    
        logger.debug('wrote mapper to %s' % mapper_filename)
    
        # make the reducer file
        fh,reducer_filename = tempfile.mkstemp(suffix='py')
        os.write(fh,'\n'.join(content).encode())
        os.write(fh,"""
    
if __name__ == '__main__':
    import sys
    reduce(sys.stdin)
""".encode())
        os.close(fh)
    
        logger.debug('wrote reducer to %s' % reducer_filename)
    
        # switch behavior conditional on testing
        if is_test:
            logger.warn('running map-reduce task in test mode')
    
            cmd = '%s | %s %s | %s %s' % (test_cmd,python_cmd,mapper_filename,python_cmd,reducer_filename)
    
            if test_output is not None:
                cmd += ' > %s' % test_output
    
            logger.debug('using cmd: %s' % cmd)
            retcode = subprocess.call(cmd,shell=True,cwd=cwd,env=get_total_context(context))
        
            if retcode != 0:
                raise CalledProcessError(retcode,cmd,None)
        else:
            logger.info('running map-reduce task in normal mode')
    
            cmd = '%s jar %s' % (hadoop_cmd,streaming_api_jar)
            cmd += ' -input "%s" -output "%s"' % (input_location,output_location)
            cmd += ' -mapper "%s %s" -reducer "%s %s"' % (python_cmd,mapper_filename,python_cmd,reducer_filename)
            cmd += ' -files "%s"' % ','.join([mapper_filename,reducer_filename,extra_files])
            if num_reducers is not None:
                cmd += ' -D mapred.reduce.tasks=%s' % num_reducers
    
            logger.debug('using cmd: %s' % cmd)
            retcode = subprocess.call(cmd,shell=True,cwd=cwd,env=get_total_context(context))
        
            if retcode != 0:
                raise CalledProcessError(retcode,cmd,None)
Ejemplo n.º 6
0
def expand_variables(x,
                     context,
                     cwd,
                     pipelines,
                     source_file,
                     lineno,
                     nested=False):
    """
    This function will both parse variables in the 
    string (assumed to be one line of text) and replace them
    with their appropriate values given this context.

    ParseException is raised if syntax is bad.
    UnknownVariableException is raised if variables or functions can't be resolved.
    """

    cpos = 0

    while cpos < len(x):

        # handle escaping special character
        if x[cpos] == '\\':
            if cpos == len(x) - 1:
                raise ParseException(source_file, lineno,
                                     'incomplete escape sequence at EOL')

            c = x[cpos + 1]

            if c not in SUPPORTED_ESCAPABLE_CHARACTERS:
                raise ParseException(source_file, lineno,
                                     'invalid escape sequence \\%s' % c)
            replacement = c
            pre_escape = x[:cpos]
            post_escape = x[(cpos + 2):]
            x = pre_escape + replacement + post_escape
            cpos = cpos + 2

        elif x[cpos] == '$':
            # variable started!
            if cpos == len(x) - 1:
                raise ParseException(source_file, lineno,
                                     'incomplete variable reference')

            # get the variable name
            m = variable_pattern.match(x[(cpos + 1):])
            if m is None:
                # check if this is a shell call
                if cpos < len(x) - 1 and x[cpos + 1] == '(':
                    varname = ''
                else:
                    raise ParseException(source_file, lineno,
                                         'invalid variable reference')
            else:
                varname = None

                # this is a curly-brace-delimited variable
                if m.group(1) is None:
                    varname = m.group(2)

                    # remove curly braces
                    varname = varname[1:-1]

                    # remove the curlies from the string x
                    x = x[:(cpos + 1)] + varname + x[(cpos + len(varname) +
                                                      3):]
                else:  # not delimited by curly braces
                    varname = m.group(1)

            # if this variable reference is actually a function
            fxn_paren_pos = cpos + 1 + len(varname)
            if fxn_paren_pos < (len(x) - 1) and x[fxn_paren_pos] == '(':
                fxn_argstart_pos = fxn_paren_pos + 1
                # we only support two functions
                if varname not in SUPPORTED_BUILTIN_FUNCTIONS:
                    raise UnknownVariableException(
                        source_file, lineno,
                        'invalid builtin function name: %s' % varname)

                # process the rest of the string
                expanded_x_part, eofxn = expand_variables(x[fxn_argstart_pos:],
                                                          context,
                                                          cwd,
                                                          pipelines,
                                                          source_file,
                                                          lineno,
                                                          nested=True)

                x = x[:fxn_argstart_pos] + expanded_x_part
                eofxn = fxn_argstart_pos + eofxn

                # extract arguments
                args_str = x[fxn_argstart_pos:eofxn]
                args = [x.strip() for x in args_str.split(',')]
                logger.debug('got fxn args: %s' % str(args))

                # apply the function
                ret_val = ''
                if varname == '':
                    ret_val = subprocess.check_output(
                        args_str,
                        shell=True,
                        cwd=cwd,
                        env=get_total_context(context))

                    # convert the bytes into a string
                    ret_val = ret_val.decode()

                    if ret_val[-1] == '\n':
                        ret_val = ret_val[:-1]

                    logger.debug('expanded shell fxn to: %s' % ret_val)

                    if '\n' in ret_val:
                        raise Exception(
                            'inline shell functions cannot return strings containing newlines: %s'
                            % ret_val)

                elif varname == 'PLN':
                    prefix = None

                    if len(args) == 1:
                        prefix = context[PIPELINE_PREFIX_VARNAME]
                        fname = args[0]
                    elif len(args) == 2:
                        pln_name = args[0]

                        if pln_name not in pipelines:
                            raise Exception(
                                'unable to find pipeline with alias "%s"' %
                                pln_name)

                        prefix = pipelines[pln_name].get_prefix()
                        logger.debug('PLN reference got prefix = %s' % prefix)
                        fname = args[1]
                    else:
                        # TODO: Add line number
                        raise Exception('too many arguments for $PLN(...) fxn')

                    ret_val = '%s%s' % (prefix, fname)

                # make the replacement
                pre_fxn = x[:cpos]
                post_fxn = x[(eofxn + 1):]
                x = pre_fxn + ret_val + post_fxn
                cpos = len(pre_fxn) + len(ret_val)

            else:
                replacement = ''

                # figure out which context to use
                var_context = context
                if '.' in varname:
                    pln_name, varname = varname.split('.')

                    if pln_name not in pipelines:
                        raise UnknownVariableException(
                            source_file, lineno,
                            'pipeline %s is unknown' % pln_name)
                    else:
                        var_context = pipelines[pln_name].get_context()

                if varname not in var_context:
                    raise UnknownVariableException(
                        source_file, lineno,
                        'variable %s does not exist' % varname)

                replacement = var_context[varname]

                # make the replacement
                pre_var = x[:cpos]
                post_var = x[(cpos + 1 + len(varname)):]
                x = pre_var + replacement + post_var
                cpos = cpos + 1 + len(replacement)

        elif nested and x[cpos] == ')':
            # We just found the end of a function (which we're nested inside of)
            return x, cpos

        else:
            cpos += 1

    # under normal circumstances, reaching the end of the string is what we want.
    # but if we are nested, then we should find a parenthesis first.
    if nested:
        raise ParseException(source_file, lineno,
                             'expected to find a ")", none found')

    return x