def determine_order_disposition(proc_cfg, username):
    """Accomplishes order dispossition tasks

      Interact with the web service to accomplish order dispossition tasks
      along with sending the initial emails out to the users after their
      order has been accepted.
    """

    # Get the logger for this task
    logger = logging.getLogger(LOGGER_NAME)

    rpcurl = proc_cfg.get('processing', 'espa_api')
    server = None

    # Create a server object if the rpcurl seems valid
    if (rpcurl is not None and
            rpcurl.startswith('http://') and
            len(rpcurl) > 7):

        server = api_interface.api_connect(rpcurl)
    else:
        raise Exception('Missing or invalid API URL')

    # Verify the API is up
    if server is None:
        raise Exception('API server was None... exiting')

    # Use order_disposition_enabled to determine if we should be processing
    # or not
    od_enabled = server.get_configuration('system.order_disposition_enabled')

    if not od_enabled.lower() == 'true':
        raise Exception('order disposition disabled... exiting')

    try:
        if not server.handle_orders(username):
            raise Exception('server.handle_orders() was not successful')

    except api_interface.APIException:
        logger.exception('A protocol error occurred')

    except Exception:
        logger.exception('An error occurred finalizing orders')

    finally:
        server = None
def main ():
    logger = logging.getLogger(__name__)  # Get logger for the module.

    # get the command line arguments
    parser = OptionParser()
    parser.add_option ('-s', '--start_year', type='int', dest='syear',
        default=0, help='year for which to start pulling LAADS data')
    parser.add_option ('-e', '--end_year', type='int', dest='eyear',
        default=0, help='last year for which to pull LAADS data')
    parser.add_option ('--today', dest='today', default=False,
        action='store_true',
        help='process LAADS data up through the most recent year and DOY')
    msg = ('process or reprocess all LAADS data from today back to {}'
           .format(START_YEAR))
    parser.add_option ('--quarterly', dest='quarterly', default=False,
        action='store_true', help=msg)

    (options, args) = parser.parse_args()
    syear = options.syear           # starting year
    eyear = options.eyear           # ending year
    today = options.today           # process most recent year of data
    quarterly = options.quarterly   # process today back to START_YEAR

    # check the arguments
    if (today == False) and (quarterly == False) and \
       (syear == 0 or eyear == 0):
        msg = ('Invalid command line argument combination.  Type --help '
              'for more information.')
        logger.error(msg)
        return ERROR

    # determine the auxiliary directory to store the data
    auxdir = os.environ.get('L8_AUX_DIR')
    if auxdir is None:
        msg = 'L8_AUX_DIR environment variable not set... exiting'
        logger.error(msg)
        return ERROR

    # Get the application token for the LAADS https interface. for ESPA
    # systems, pull the token from the config file.
    if TOKEN is None:
        # ESPA Processing Environment
        # Read ~/.usgs/espa/processing.conf to get the URL for the ESPA API.
        # Connect to the ESPA API and get the application token for downloading
        # the LAADS data from the internal database.
        PROC_CFG_FILENAME = 'processing.conf'
        proc_cfg = retrieve_cfg(PROC_CFG_FILENAME)
        rpcurl = proc_cfg.get('processing', 'espa_api')
        server = api_connect(rpcurl)
        if server:
            token = server.get_configuration('aux.downloads.laads.token')
    else:
        # Non-ESPA processing.  TOKEN needs to be defined at the top of this
        # script.
        token = TOKEN

    if token is None:
        logger.error('Application token is None. This needs to be a valid '
            'token provided for accessing the LAADS data. '
            'https://ladsweb.modaps.eosdis.nasa.gov/tools-and-services/data-download-scripts/')
        return ERROR

    # if processing today then process the current year.  if the current
    # DOY is within the first month, then process the previous year as well
    # to make sure we have all the recently available data processed.
    if today:
        msg = 'Processing LAADS data up to the most recent year and DOY.'
        logger.info(msg)
        now = datetime.datetime.now()
        day_of_year = now.timetuple().tm_yday
        eyear = now.year
        if day_of_year <= 31:
            syear = eyear - 1
        else:
            syear = eyear

    elif quarterly:
        msg = 'Processing LAADS data back to {}'.format(START_YEAR)
        logger.info(msg)
        eyear = now.year
        syear = START_YEAR

    msg = 'Processing LAADS data for {} - {}'.format(syear, eyear)
    logger.info(msg)
    for yr in range(eyear, syear-1, -1):
        msg = 'Processing year: {}'.format(yr)
        logger.info(msg)
        status = getLadsData(auxdir, yr, today, token)
        if status == ERROR:
            msg = ('Problems occurred while processing LAADS data for year {}'
                   .format(yr))
            logger.error(msg)
            return ERROR

    msg = 'LAADS processing complete.'
    logger.info(msg)
    return SUCCESS
Beispiel #3
0
def process(proc_cfg, developer_sleep_mode=False):
    """Read all lines from STDIN and process them

    Each line is converted to a JSON dictionary of the parameters for
    processing.  Validation is performed on the JSON dictionary to test if
    valid for this mapper.  After validation the generation of the products
    is performed.
    """

    # Initially set to the base logger
    logger = EspaLogging.get_logger('base')

    processing_location = socket.gethostname()

    # Process each line from stdin
    for line in sys.stdin:
        if not line or len(line) < 1 or not line.strip().find('{') > -1:
            # this is how the nlineinputformat is supplying values:
            # 341104        {"orderid":
            # logger.info('BAD LINE:{}##'.format(line))
            continue
        else:
            # take the entry starting at the first opening parenth to the end
            line = line[line.find('{'):]
            line = line.strip()

        # Reset these for each line
        (server, order_id, product_id) = (None, None, None)

        start_time = datetime.datetime.now()

        # Initialize so that we don't sleep
        dont_sleep = True

        try:
            line = line.replace('#', '')
            parms = json.loads(line)

            if not parameters.test_for_parameter(parms, 'options'):
                raise ValueError('Error missing JSON [options] record')

            # TODO scene will be replaced with product_id someday
            (order_id, product_id, product_type, options) = \
                (parms['orderid'], parms['scene'], parms['product_type'],
                 parms['options'])

            if product_id != 'plot':
                # Developer mode is always false unless you are a developer
                # so sleeping will always occur for none plotting requests
                # Override with the developer mode
                dont_sleep = developer_sleep_mode

            # Fix the orderid in-case it contains any single quotes
            # The processors can not handle single quotes in the email
            # portion due to usage in command lines.
            parms['orderid'] = order_id.replace("'", '')

            # If it is missing due to above TODO, then add it
            if not parameters.test_for_parameter(parms, 'product_id'):
                parms['product_id'] = product_id

            # Figure out if debug level logging was requested
            debug = False
            if parameters.test_for_parameter(options, 'debug'):
                debug = options['debug']

            # Configure and get the logger for this order request
            EspaLogging.configure(settings.PROCESSING_LOGGER, order=order_id,
                                  product=product_id, debug=debug)
            logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER)

            logger.info('Processing {}:{}'.format(order_id, product_id))

            # Update the status in the database
            if parameters.test_for_parameter(parms, 'espa_api'):
                if parms['espa_api'] != 'skip_api':
                    server = api_interface.api_connect(parms['espa_api'])
                    if server is not None:
                        status = server.update_status(product_id, order_id,
                                                      processing_location,
                                                      'processing')
                        if not status:
                            logger.warning('Failed processing API call'
                                           ' to update_status to processing')

            if product_id != 'plot':
                # Make sure we can process the sensor
                tmp_info = sensor.info(product_id)
                del tmp_info

                # Make sure we have a valid output format
                if not parameters.test_for_parameter(options, 'output_format'):
                    logger.warning('[output_format] parameter missing'
                                   ' defaulting to envi')
                    options['output_format'] = 'envi'

                if (options['output_format']
                        not in parameters.VALID_OUTPUT_FORMATS):

                    raise ValueError('Invalid Output format {}'
                                     .format(options['output_format']))

            # ----------------------------------------------------------------
            # NOTE: The first thing the product processor does during
            #       initialization is validate the input parameters.
            # ----------------------------------------------------------------

            destination_product_file = 'ERROR'
            destination_cksum_file = 'ERROR'
            pp = None
            try:
                # All processors are implemented in the processor module
                pp = processor.get_instance(proc_cfg, parms)
                (destination_product_file, destination_cksum_file) = \
                    pp.process()

            finally:
                # Free disk space to be nice to the whole system.
                if pp is not None:
                    pp.remove_product_directory()

            # Sleep the number of seconds for minimum request duration
            sleep(get_sleep_duration(proc_cfg, start_time, dont_sleep))

            archive_log_files(order_id, product_id)

            # Everything was successfull so mark the scene complete
            if server is not None:
                status = server.mark_scene_complete(product_id, order_id,
                                                    processing_location,
                                                    destination_product_file,
                                                    destination_cksum_file,
                                                    '')
                if not status:
                    logger.warning('Failed processing API call to'
                                   ' mark_scene_complete')

        except Exception as excep:

            # First log the exception
            logger.exception('Exception encountered stacktrace follows')

            # Sleep the number of seconds for minimum request duration
            sleep(get_sleep_duration(proc_cfg, start_time, dont_sleep))

            archive_log_files(order_id, product_id)

            if server is not None:
                try:
                    status = set_product_error(server,
                                               order_id,
                                               product_id,
                                               processing_location)
                except Exception:
                    logger.exception('Exception encountered stacktrace'
                                     ' follows')
        finally:
            # Reset back to the base logger
            logger = EspaLogging.get_logger('base')
def process_requests(cron_cfg, proc_cfg, args, queue_priority,
                     request_priority):
    """Retrieves and kicks off processes

    Queries the API service to see if there are any requests that need
    to be processed with the specified type, priority and/or user.  If there
    are, this method builds and executes a hadoop job and updates the status
    for each request through the API service."

    Args:
        cron_cfg (ConfigParser): Configuration for ESPA cron.
        proc_cfg (ConfigParser): Configuration for ESPA processing.
        args (struct): The arguments retireved from the command line.
        queue_priority (str): The queue to use or None.
        request_priority (str): The request to use or None.

    Returns:
        Nothing is returned.

    Raises:
        Exception(message)
    """

    # Get the logger for this task
    logger = logging.getLogger(LOGGER_NAME)

    # check the number of hadoop jobs and don't do anything if they
    # are over a limit
    job_limit = cron_cfg.getint('hadoop', 'max_jobs')
    cmd = "hadoop job -list|awk '{print $1}'|grep -c job 2>/dev/null"
    try:
        job_count = execute_cmd(cmd)
    except Exception as e:
        errmsg = 'Stdout/Stderr is: 0'
        if errmsg in e.message:
            job_count = 0
        else:
            raise e

    if int(job_count) >= int(job_limit):
        logger.warn('Detected {0} Hadoop jobs running'.format(job_count))
        logger.warn('No additional jobs will be run until job count'
                    ' is below {0}'.format(job_limit))
        return

    rpcurl = proc_cfg.get('processing', 'espa_api')
    server = None

    # Create a server object if the rpcurl seems valid
    if (rpcurl is not None and rpcurl.startswith('http://')
            and len(rpcurl) > 7):

        server = api_interface.api_connect(rpcurl)
    else:
        raise Exception('Missing or invalid environment variable ESPA_API')

    home_dir = os.environ.get('HOME')
    hadoop_executable = os.path.join(home_dir, 'bin/hadoop/bin/hadoop')

    # Verify API server
    if server is None:
        raise Exception('ESPA API did not respond... exiting')

    user = server.get_configuration('landsatds.username')
    if user is None:
        raise Exception('landsatds.username is not defined... exiting')

    password = server.get_configuration('landsatds.password')
    if password is None:
        raise Exception('landsatds.password is not defined... exiting')

    host = server.get_configuration('landsatds.host')
    if host is None:
        raise Exception('landsatds.host is not defined... exiting')

    # Use ondemand_enabled to determine if we should be processing or not
    ondemand_enabled = server.get_configuration('system.ondemand_enabled')

    # Determine the appropriate hadoop queue to use
    hadoop_job_queue = get_queue_name(cron_cfg, queue_priority)

    if not ondemand_enabled.lower() == 'true':
        raise Exception('on demand disabled... exiting')

    # Create a partial function to reduce duplication in some of the
    # following code
    proc_cmdenv = partial(gen_cmdenv_from_cfg,
                          cfg=proc_cfg,
                          section='processing')

    try:
        logger.info('Checking for requests to process...')
        requests = server.get_scenes_to_process(int(args.limit), args.user,
                                                request_priority,
                                                list(args.product_types))
        if requests:
            # Figure out the name of the order file
            stamp = datetime.now()
            job_name = ('%s_%s_%s_%s_%s_%s-%s-espa_job' %
                        (str(stamp.month), str(stamp.day), str(stamp.year),
                         str(stamp.hour), str(stamp.minute), str(
                             stamp.second), str(queue_priority)))

            logger.info(' '.join([
                'Found requests to process,', 'generating job name:', job_name
            ]))

            job_filename = '{0}.txt'.format(job_name)
            job_filepath = os.path.join('/tmp', job_filename)

            # Create the order file full of all the scenes requested
            with open(job_filepath, 'w+') as espa_fd:
                for request in requests:
                    request['espa_api'] = rpcurl

                    # Log the request before passwords are added
                    line_entry = json.dumps(request)
                    logger.info(line_entry)

                    # Add the usernames and passwords to the options
                    request['options']['source_username'] = user
                    request['options']['destination_username'] = user
                    request['options']['source_pw'] = password
                    request['options']['destination_pw'] = password

                    # Need to refresh since we added password stuff that
                    # could not be logged
                    line_entry = json.dumps(request)

                    # Split the jobs using newline's
                    request_line = ''.join([line_entry, '\n'])

                    # Write out the request line
                    espa_fd.write(request_line)

            # Specify the location of the order file on the hdfs
            hdfs_target = os.path.join('requests', job_filename)

            # Define command line to store the job file in hdfs
            hadoop_store_command = [
                hadoop_executable, 'dfs', '-copyFromLocal', job_filepath,
                hdfs_target
            ]

            jars_path = os.path.join(home_dir, 'bin/hadoop/contrib/streaming',
                                     'hadoop-streaming*.jar')

            code_dir = os.path.join(home_dir, 'espa-site/processing')

            # Specify the mapper application
            mapper_path = os.path.join(code_dir, 'ondemand_mapper.py')

            # Define command line to execute the hadoop job
            # Be careful it is possible to have conflicts between module names
            #
            # When Hadoop kicks off a job task, it doesn't set $HOME
            # However matplotlib requires it to be set
            hadoop_run_command = \
                [hadoop_executable, 'jar', jars_path,
                 '-D', ('mapred.task.timeout={0}'
                        .format(cron_cfg.getint('hadoop', 'timeout'))),
                 '-D', 'mapred.reduce.tasks=0',
                 '-D', 'mapred.job.queue.name={0}'.format(hadoop_job_queue),
                 '-D', 'mapred.job.name="{0}"'.format(job_name),
                 '-inputformat', ('org.apache.hadoop.mapred.'
                                  'lib.NLineInputFormat'),
                 '-file', mapper_path,
                 '-file', os.path.join(code_dir, 'api_interface.py'),
                 '-file', os.path.join(code_dir, 'config_utils.py'),
                 '-file', os.path.join(code_dir, 'distribution.py'),
                 '-file', os.path.join(code_dir, 'environment.py'),
                 '-file', os.path.join(code_dir, 'espa_exception.py'),
                 '-file', os.path.join(code_dir, 'initialization.py'),
                 '-file', os.path.join(code_dir, 'landsat_metadata.py'),
                 '-file', os.path.join(code_dir, 'logging_tools.py'),
                 '-file', os.path.join(code_dir, 'parameters.py'),
                 '-file', os.path.join(code_dir, 'processor.py'),
                 '-file', os.path.join(code_dir, 'sensor.py'),
                 '-file', os.path.join(code_dir, 'settings.py'),
                 '-file', os.path.join(code_dir, 'staging.py'),
                 '-file', os.path.join(code_dir, 'statistics.py'),
                 '-file', os.path.join(code_dir, 'transfer.py'),
                 '-file', os.path.join(code_dir, 'utilities.py'),
                 '-file', os.path.join(code_dir, 'warp.py'),
                 '-mapper', mapper_path,
                 '-cmdenv', 'HOME=$HOME',
                 '-cmdenv', proc_cmdenv(option='espa_work_dir'),
                 '-cmdenv', proc_cmdenv(option='espa_distribution_method'),
                 '-cmdenv', proc_cmdenv(option='espa_distribution_dir'),
                 '-cmdenv', proc_cmdenv(option='espa_schema'),
                 '-cmdenv', proc_cmdenv(option='espa_land_mass_polygon'),
                 '-cmdenv', proc_cmdenv(option='espa_api'),
                 '-cmdenv', proc_cmdenv(option='espa_cache_host_list'),
                 '-cmdenv', proc_cmdenv(option='espa_elevation_dir'),
                 '-cmdenv', proc_cmdenv(option='ias_data_dir'),
                 '-cmdenv', proc_cmdenv(option='pythonpath'),
                 '-cmdenv', proc_cmdenv(option='ledaps_aux_dir'),
                 '-cmdenv', proc_cmdenv(option='l8_aux_dir'),
                 '-cmdenv', proc_cmdenv(option='esun'),
                 '-cmdenv', proc_cmdenv(option='lst_aux_dir'),
                 '-cmdenv', proc_cmdenv(option='lst_data_dir'),
                 '-cmdenv', proc_cmdenv(option='modtran_path'),
                 '-cmdenv', proc_cmdenv(option='modtran_data_dir'),
                 '-cmdenv', proc_cmdenv(option='aster_ged_server_name'),
                 '-input', hdfs_target,
                 '-output', hdfs_target + '-out']

            # Define the executables to clean up hdfs
            hadoop_delete_request_command1 = [
                hadoop_executable, 'dfs', '-rmr', hdfs_target
            ]
            hadoop_delete_request_command2 = [
                hadoop_executable, 'dfs', '-rmr', hdfs_target + '-out'
            ]

            logger.info('Storing request file to hdfs...')
            output = ''
            try:
                cmd = ' '.join(hadoop_store_command)
                logger.info('Store cmd:{0}'.format(cmd))

                output = execute_cmd(cmd)
            except Exception:
                msg = 'Error storing files to HDFS... exiting'
                raise Exception(msg)
            finally:
                if len(output) > 0:
                    logger.info(output)

                logger.info('Deleting local request file copy [{0}]'.format(
                    job_filepath))
                os.unlink(job_filepath)

            try:
                # Update the scene list as queued so they don't get pulled
                # down again now that these jobs have been stored in hdfs
                product_list = list()
                for request in requests:
                    product_list.append((request['orderid'], request['scene']))

                    logger.info(
                        'Adding scene:{0} orderid:{1} to queued list'.format(
                            request['scene'], request['orderid']))

                server.queue_products(product_list, 'CDR_ECV cron driver',
                                      job_name)

                logger.info('Running hadoop job...')
                output = ''
                try:
                    cmd = ' '.join(hadoop_run_command)
                    logger.info('Run cmd:{0}'.format(cmd))

                    output = execute_cmd(cmd)
                except Exception:
                    logger.exception('Error running Hadoop job...')
                finally:
                    if len(output) > 0:
                        logger.info(output)

            finally:
                logger.info('Deleting hadoop job request file from hdfs....')
                output = ''
                try:
                    cmd = ' '.join(hadoop_delete_request_command1)
                    output = execute_cmd(cmd)
                except Exception:
                    logger.exception("Error deleting hadoop job request file")
                finally:
                    if len(output) > 0:
                        logger.info(output)

                logger.info('Deleting hadoop job output...')
                output = ''
                try:
                    cmd = ' '.join(hadoop_delete_request_command2)
                    output = execute_cmd(cmd)
                except Exception:
                    logger.exception('Error deleting hadoop job output')
                finally:
                    if len(output) > 0:
                        logger.info(output)

        else:
            logger.info('No requests to process....')

    except api_interface.APIException:
        logger.exception('A protocol error occurred')

    except Exception:
        logger.exception('Error Processing Ondemand Requests')

    finally:
        server = None
def process_requests(cron_cfg, proc_cfg, args,
                     queue_priority, request_priority):
    """Retrieves and kicks off processes

    Queries the API service to see if there are any requests that need
    to be processed with the specified type, priority and/or user.  If there
    are, this method builds and executes a hadoop job and updates the status
    for each request through the API service."

    Args:
        cron_cfg (ConfigParser): Configuration for ESPA cron.
        proc_cfg (ConfigParser): Configuration for ESPA processing.
        args (struct): The arguments retireved from the command line.
        queue_priority (str): The queue to use or None.
        request_priority (str): The request to use or None.

    Returns:
        Nothing is returned.

    Raises:
        Exception(message)
    """

    # Get the logger for this task
    logger = logging.getLogger(LOGGER_NAME)

    # Define path to hadoop commandline executables
    home_dir = os.environ.get('HOME')
    yarn_executable = os.path.join(home_dir, 'bin/hadoop/bin/yarn')
    hdfs_executable = os.path.join(home_dir, 'bin/hadoop/bin/hdfs')
    jars_path = os.path.join(home_dir,
                             'bin/hadoop/share/hadoop/tools/lib/',
                             'hadoop-streaming-*.jar')

    # check the number of hadoop jobs and don't do anything if they
    # are over a limit
    job_limit = cron_cfg.getint('hadoop', 'max_jobs')
    yarn_running_apps_command = [yarn_executable, "application", "-list"]

    try:
        cmd = ' '.join(yarn_running_apps_command)
        app_states = execute_cmd(cmd)
        # Get "total applications: N" output line from YARN
        running_line = [l for l in app_states.split('\n')
                        if 'Total number of applications' in l].pop()
        job_count = running_line.split(':')[-1]
    except Exception as e:
        errmsg = 'Stdout/Stderr is: 0'
        if errmsg in e.message:
            job_count = 0
        else:
            raise e

    if int(job_count) >= int(job_limit):
        logger.warn('Detected {0} Hadoop jobs running'.format(job_count))
        logger.warn('No additional jobs will be run until job count'
                    ' is below {0}'.format(job_limit))
        return

    rpcurl = proc_cfg.get('processing', 'espa_api')
    server = None

    # Create a server object if the rpcurl seems valid
    if (rpcurl is not None and
            rpcurl.startswith('http://') and
            len(rpcurl) > 7):

        server = api_interface.api_connect(rpcurl)
    else:
        raise Exception('Missing or invalid environment variable ESPA_API')

    # Verify API server
    if server is None:
        raise Exception('ESPA API did not respond... exiting')

    user = server.get_configuration('landsatds.username')
    if user is None:
        raise Exception('landsatds.username is not defined... exiting')

    password = server.get_configuration('landsatds.password')
    if password is None:
        raise Exception('landsatds.password is not defined... exiting')

    host = server.get_configuration('landsatds.host')
    if host is None:
        raise Exception('landsatds.host is not defined... exiting')

    # Use ondemand_enabled to determine if we should be processing or not
    ondemand_enabled = server.get_configuration('system.ondemand_enabled')

    # Determine the appropriate hadoop queue to use
    hadoop_job_queue = get_queue_name(cron_cfg, queue_priority)

    if not ondemand_enabled.lower() == 'true':
        raise Exception('on demand disabled... exiting')

    # Create a partial function to reduce duplication in some of the
    # following code
    proc_cmdenv = partial(gen_cmdenv_from_cfg,
                          cfg=proc_cfg, section='processing')

    try:
        logger.info('Checking for requests to process...')
        requests = server.get_scenes_to_process(int(args.limit), args.user,
                                                request_priority,
                                                list(args.product_types))
        if requests:
            # Figure out the name of the order file
            stamp = datetime.now()
            job_name = ('{0:%Y-%m-%d-%H-%M-%S}-{1}-espa_job'
                        .format(stamp, queue_priority))

            logger.info(' '.join(['Found requests to process,',
                                  'generating job name:', job_name]))

            job_filename = '{0}.txt'.format(job_name)
            job_filepath = os.path.join('/tmp', job_filename)

            # Create the order file full of all the scenes requested
            with open(job_filepath, 'w+') as espa_fd:
                for request in requests:
                    request['espa_api'] = rpcurl

                    # Log the request before passwords are added
                    line_entry = json.dumps(request)
                    logger.info(line_entry)

                    # Add the usernames and passwords to the options
                    request['options']['source_username'] = user
                    request['options']['destination_username'] = user
                    request['options']['source_pw'] = password
                    request['options']['destination_pw'] = password

                    # Need to refresh since we added password stuff that
                    # could not be logged
                    line_entry = json.dumps(request)

                    # Split the jobs using newline's
                    request_line = ''.join([line_entry, '\n'])

                    # Write out the request line
                    espa_fd.write(request_line)

            # Specify the location of the order file on the hdfs
            hdfs_target = os.path.join('requests', job_filename)

            # Define command line to store the job file in hdfs
            hadoop_store_command = [hdfs_executable, 'dfs', '-put', job_filepath, hdfs_target]

            # Specify the mapper application
            code_dir = os.path.join(home_dir, 'espa-site/processing')
            mapper_path = 'processing/ondemand_mapper.py'

            # Define command line to execute the hadoop job
            # Be careful it is possible to have conflicts between module names
            #
            # When Hadoop kicks off a job task, it doesn't set $HOME
            # However matplotlib requires it to be set
            hadoop_run_command = \
                [yarn_executable, 'jar', jars_path,
                 '-D', ('mapred.task.timeout={0}'
                        .format(cron_cfg.getint('hadoop', 'timeout'))),
                 '-D', 'mapred.reduce.tasks=0',
                 '-D', 'mapred.job.queue.name={0}'.format(hadoop_job_queue),
                 '-D', 'mapred.job.name="{0}"'.format(job_name),
                 '-files', code_dir,
                 '-mapper', mapper_path,
                 '-input', hdfs_target,
                 '-inputformat', 'org.apache.hadoop.mapred.lib.NLineInputFormat',
                 '-cmdenv', 'HOME={0}'.format(home_dir),
                 '-output', hdfs_target + '-out']

            # Define the executables to clean up hdfs
            hadoop_delete_request_command1 = [hdfs_executable, 'dfs',
                                              '-rm', '-r', hdfs_target]
            hadoop_delete_request_command2 = [hdfs_executable, 'dfs',
                                              '-rm', '-r', hdfs_target + '-out']

            logger.info('Storing request file to hdfs...')
            output = ''
            try:
                cmd = ' '.join(hadoop_store_command)
                logger.info('Store cmd:{0}'.format(cmd))

                output = execute_cmd(cmd)
            except Exception:
                msg = 'Error storing files to HDFS... exiting'
                raise Exception(msg)
            finally:
                if len(output) > 0:
                    logger.info(output)

                logger.info('Deleting local request file copy [{0}]'
                            .format(job_filepath))
                os.unlink(job_filepath)

            try:
                # Update the scene list as queued so they don't get pulled
                # down again now that these jobs have been stored in hdfs
                product_list = list()
                for request in requests:
                    product_list.append((request['orderid'],
                                         request['scene']))

                    logger.info('Adding scene:{0} orderid:{1} to queued list'
                                .format(request['scene'], request['orderid']))

                server.queue_products(product_list, 'CDR_ECV cron driver',
                                      job_name)

                logger.info('Running hadoop job...')
                output = ''
                try:
                    cmd = ' '.join(hadoop_run_command)
                    logger.info('Run cmd:{0}'.format(cmd))

                    output = execute_cmd(cmd)
                except Exception:
                    logger.exception('Error running Hadoop job...')
                finally:
                    if len(output) > 0:
                        logger.info(output)

            finally:
                logger.info('Deleting hadoop job request file from hdfs....')
                output = ''
                try:
                    cmd = ' '.join(hadoop_delete_request_command1)
                    output = execute_cmd(cmd)
                except Exception:
                    logger.exception("Error deleting hadoop job request file")
                finally:
                    if len(output) > 0:
                        logger.info(output)

                logger.info('Deleting hadoop job output...')
                output = ''
                try:
                    cmd = ' '.join(hadoop_delete_request_command2)
                    output = execute_cmd(cmd)
                except Exception:
                    logger.exception('Error deleting hadoop job output')
                finally:
                    if len(output) > 0:
                        logger.info(output)

        else:
            logger.info('No requests to process....')

    except api_interface.APIException:
        logger.exception('A protocol error occurred')

    except Exception:
        logger.exception('Error Processing Ondemand Requests')

    finally:
        server = None