def process_requests(args, logger_name, queue_priority, request_priority):
    '''
    Description:
      Queries the xmlrpc service to see if there are any requests that need
      to be processed with the specified type, priority and/or user.  If there
      are, this method builds and executes a hadoop job and updates the status
      for each request through the xmlrpc service."
    '''

    # Get the logger for this task
    logger = EspaLogging.get_logger(logger_name)


    # check the number of hadoop jobs and don't do anything if they 
    # are over a limit
    job_limit = settings.HADOOP_MAX_JOBS
    cmd = "hadoop job -list|awk '{print $1}'|grep -c job 2>/dev/null"
    try:
        job_count = utilities.execute_cmd(cmd)
    except Exception, e:
        errmsg = 'Stdout/Stderr is: 0'
        if errmsg in e.message:
            job_count = 0
        else:
            raise e
                 '-output', hdfs_target + '-out']

            # Define the executables to clean up hdfs
            hadoop_delete_request_command1 = [hadoop_executable, 'dfs',
                                              '-rmr', hdfs_target]
            hadoop_delete_request_command2 = [hadoop_executable, 'dfs',
                                              '-rmr', hdfs_target + '-out']

            # ----------------------------------------------------------------
            logger.info("Storing request file to hdfs...")
            output = ''
            try:
                cmd = ' '.join(hadoop_store_command)
                logger.info("Store cmd:%s" % cmd)

                output = utilities.execute_cmd(cmd)
            except Exception:
                msg = "Error storing files to HDFS... exiting"
                raise Exception(msg)
            finally:
                if len(output) > 0:
                    logger.info(output)

                logger.info("Deleting local request file copy [%s]"
                            % job_filepath)
                os.unlink(job_filepath)

            try:
                # ------------------------------------------------------------
                # Update the scene list as queued so they don't get pulled
                # down again now that these jobs have been stored in hdfs
Esempio n. 3
0
def process_requests(args, logger_name, queue_priority, request_priority):
    '''
    Description:
      Queries the xmlrpc service to see if there are any requests that need
      to be processed with the specified type, priority and/or user.  If there
      are, this method builds and executes a hadoop job and updates the status
      for each request through the xmlrpc service."
    '''

    # Get the logger for this task
    logger = EspaLogging.get_logger(logger_name)

    rpcurl = os.environ.get('ESPA_XMLRPC')
    server = None

    # Create a server object if the rpcurl seems valid
    if (rpcurl is not None and rpcurl.startswith('http://')
            and len(rpcurl) > 7):

        server = xmlrpclib.ServerProxy(rpcurl, allow_none=True)
    else:
        raise Exception("Missing or invalid environment variable ESPA_XMLRPC")

    home_dir = os.environ.get('HOME')
    hadoop_executable = "%s/bin/hadoop/bin/hadoop" % home_dir

    # Verify xmlrpc server
    if server is None:
        msg = "xmlrpc server was None... exiting"
        raise Exception(msg)

    user = server.get_configuration('landsatds.username')
    if len(user) == 0:
        msg = "landsatds.username is not defined... exiting"
        raise Exception(msg)

    pw = urllib.quote(server.get_configuration('landsatds.password'))
    if len(pw) == 0:
        msg = "landsatds.password is not defined... exiting"
        raise Exception(msg)

    host = server.get_configuration('landsatds.host')
    if len(host) == 0:
        msg = "landsatds.host is not defined... exiting"
        raise Exception(msg)

    # Use ondemand_enabled to determine if we should be processing or not
    ondemand_enabled = server.get_configuration('ondemand_enabled')

    # Determine the appropriate hadoop queue to use
    hadoop_job_queue = settings.HADOOP_QUEUE_MAPPING[queue_priority]

    if not ondemand_enabled.lower() == 'true':
        raise Exception("on demand disabled... exiting")

    try:
        logger.info("Checking for requests to process...")
        requests = server.get_scenes_to_process(int(args.limit), args.user,
                                                request_priority,
                                                list(args.product_types))
        if requests:
            # Figure out the name of the order file
            stamp = datetime.now()
            job_name = ('%s_%s_%s_%s_%s_%s-%s-espa_job'
                        % (str(stamp.month), str(stamp.day),
                           str(stamp.year), str(stamp.hour),
                           str(stamp.minute), str(stamp.second),
                           str(queue_priority)))

            logger.info(' '.join(["Found requests to process,",
                                  "generating job name:", job_name]))

            job_filename = '%s%s' % (job_name, '.txt')
            job_filepath = os.path.join('/tmp', job_filename)

            # Create the order file full of all the scenes requested
            with open(job_filepath, 'w+') as espa_fd:
                for request in requests:
                    (orderid, options) = (request['orderid'],
                                          request['options'])

                    request['xmlrpcurl'] = rpcurl

                    # Log the requested options before passwords are added
                    line_entry = json.dumps(request)
                    logger.info(line_entry)

                    # Add the usernames and passwords to the options
                    options['source_username'] = user
                    options['destination_username'] = user
                    options['source_pw'] = pw
                    options['destination_pw'] = pw

                    request['options'] = options

                    # Need to refresh since we added password stuff that
                    # could not be logged
                    line_entry = json.dumps(request)

                    # Pad the entry so hadoop will properly split the jobs
                    filler_count = (settings.ORDER_BUFFER_LENGTH -
                                    len(line_entry))
                    request_line = ''.join([line_entry,
                                            ('#' * filler_count), '\n'])

                    # Write out the request line
                    espa_fd.write(request_line)
                # END - for scene
            # END - with espa_fd

            # Specify the location of the order file on the hdfs
            hdfs_target = 'requests/%s' % job_filename

            # Specify the mapper application
            mapper = "ondemand_mapper.py"

            # Define command line to store the job file in hdfs
            hadoop_store_command = [hadoop_executable, 'dfs', '-copyFromLocal',
                                    job_filepath, hdfs_target]

            jars = os.path.join(home_dir, 'bin/hadoop/contrib/streaming',
                                'hadoop-streaming*.jar')
            # Define command line to execute the hadoop job
            # Be careful it is possible to have conflicts between module names
            hadoop_run_command = \
                [hadoop_executable, 'jar', jars,
                 '-D', 'mapred.task.timeout=%s' % settings.HADOOP_TIMEOUT,
                 '-D', 'mapred.reduce.tasks=0',
                 '-D', 'mapred.job.queue.name=%s' % hadoop_job_queue,
                 '-D', 'mapred.job.name="%s"' % job_name,
                 '-file', '%s/espa-site/processing/%s' % (home_dir, mapper),
                 '-file', '%s/espa-site/processing/processor.py' % home_dir,
                 '-file', '%s/espa-site/processing/browse.py' % home_dir,
                 '-file', '%s/espa-site/processing/environment.py' % home_dir,
                 '-file', ('%s/espa-site/processing/initialization.py'
                           % home_dir),
                 '-file', '%s/espa-site/processing/distribution.py' % home_dir,
                 '-file', ('%s/espa-site/processing/espa_exception.py'
                           % home_dir),
                 '-file', '%s/espa-site/processing/metadata.py' % home_dir,
                 '-file', '%s/espa-site/processing/parameters.py' % home_dir,
                 '-file', '%s/espa-site/processing/solr.py' % home_dir,
                 '-file', '%s/espa-site/processing/staging.py' % home_dir,
                 '-file', '%s/espa-site/processing/statistics.py' % home_dir,
                 '-file', '%s/espa-site/processing/transfer.py' % home_dir,
                 '-file', '%s/espa-site/processing/warp.py' % home_dir,
                 '-file', ('%s/espa-site/espa_common/logger_factory.py'
                           % home_dir),
                 '-file', '%s/espa-site/espa_common/sensor.py' % home_dir,
                 '-file', '%s/espa-site/espa_common/settings.py' % home_dir,
                 '-file', '%s/espa-site/espa_common/utilities.py' % home_dir,
                 '-mapper', '%s/espa-site/processing/%s' % (home_dir, mapper),
                 '-cmdenv', 'ESPA_WORK_DIR=$ESPA_WORK_DIR',
                 '-cmdenv', 'HOME=$HOME',
                 '-cmdenv', 'USER=$USER',
                 '-cmdenv', 'LEDAPS_AUX_DIR=$LEDAPS_AUX_DIR',
                 '-cmdenv', 'L8_AUX_DIR=$L8_AUX_DIR',
                 '-cmdenv', 'ESUN=$ESUN',
                 '-input', hdfs_target,
                 '-output', hdfs_target + '-out']

            # Define the executables to clean up hdfs
            hadoop_delete_request_command1 = [hadoop_executable, 'dfs',
                                              '-rmr', hdfs_target]
            hadoop_delete_request_command2 = [hadoop_executable, 'dfs',
                                              '-rmr', hdfs_target + '-out']

            # ----------------------------------------------------------------
            logger.info("Storing request file to hdfs...")
            output = ''
            try:
                cmd = ' '.join(hadoop_store_command)
                logger.info("Store cmd:%s" % cmd)

                output = utilities.execute_cmd(cmd)
            except Exception, e:
                msg = "Error storing files to HDFS... exiting"
                raise Exception(msg)
            finally:
                if len(output) > 0:
                    logger.info(output)

                logger.info("Deleting local request file copy [%s]"
                            % job_filepath)
                os.unlink(job_filepath)

            try:
                # ------------------------------------------------------------
                # Update the scene list as queued so they don't get pulled
                # down again now that these jobs have been stored in hdfs
                product_list = list()
                for request in requests:
                    orderid = request['orderid']
                    sceneid = request['scene']
                    product_list.append((orderid, sceneid))

                    logger.info("Adding scene:%s orderid:%s to queued list"
                                % (sceneid, orderid))

                server.queue_products(product_list, 'CDR_ECV cron driver',
                                      job_name)

                # ------------------------------------------------------------
                logger.info("Running hadoop job...")
                output = ''
                try:
                    cmd = ' '.join(hadoop_run_command)
                    logger.info("Run cmd:%s" % cmd)

                    output = utilities.execute_cmd(cmd)
                except Exception, e:
                    logger.exception("Error running Hadoop job...")
                finally:
                    if len(output) > 0:
                        logger.info(output)