Exemple #1
0
 def _preprocess(self, return_params, kwargs):
     prefix = 's3://'
     aws_storage_bucket_name = auxiliary.read_option(self.config, 'aws', 'aws_storage_bucket_name')
     aws_storage_head = os.path.join(prefix, aws_storage_bucket_name)
     
     s3_app_input = auxiliary.read_option(self.config, 'preprocessor', 's3_app_input')
     s3_app_output = auxiliary.read_option(self.config, 'preprocessor', 's3_app_output')
     s3_app_scripts = auxiliary.read_option(self.config, 'preprocessor', 's3_app_scripts')
     s3cfg_path = os.path.join(self.CLAUDE_ROOT, auxiliary.read_option(self.config, 'preprocessor', 's3cfg'))
     
     group_id = kwargs['group_id']
     input_param = kwargs['input_param']
     
     producer = messaging.MessageProducer(
                                         messaging.buildRabbitMQConnectionLink(address=return_params['return_address']),
                                         return_params['return_queue'],
                                         )
     scripts_dir = scripts.__path__[0]
     
     with auxiliary.TemporaryDirectory() as tmp_dir:
         input_s3_path_full = os.path.join(aws_storage_head, kwargs['input_file'])
         input_arch_name = os.path.basename(input_s3_path_full)
         input_arch_path = os.path.join(tmp_dir, input_arch_name)
         s3tools.s3cmdGET(s3cfg_path, input_s3_path_full, input_arch_path)
         
         ready_archs = self._handle_local_arch(input_arch_path)
         
         for arch in ready_archs:
             app_type = AppParamsProvider(arch['app_type'], input_param)
             
             script_name = app_type.getAppScriptName()
             script_path = os.path.join(scripts_dir, script_name)
             script_s3_path_full = os.path.join(*[aws_storage_head, s3_app_scripts, script_name])
             s3tools.s3cmdPUT(s3cfg_path, script_path, script_s3_path_full)
             
             app_input_path = arch['arch_path']
             app_input_name = os.path.basename(app_input_path)
             app_input_s3_path = os.path.join(s3_app_input, app_input_name)
             app_input_s3_path_full = os.path.join(aws_storage_head, app_input_s3_path)
             s3tools.s3cmdPUT(s3cfg_path, app_input_path, app_input_s3_path_full)
             
             app_output_s3_path = os.path.join(s3_app_output, 'output_' + app_input_name)
             app_output_s3_path_full = os.path.join(aws_storage_head, app_output_s3_path)
             
             app_params = {
                             'app_input_file' : app_input_s3_path_full,
                             'app_output_file' : app_output_s3_path_full,
                             'app_script' : script_s3_path_full,
             }
             app_params.update(app_type.getAppSpecificParams())
             
             message = messaging.createMessage(
                                                 'preprocessed', 
                                                 group_id=group_id,
                                                 app_type=arch['app_type'],
                                                 app_name=arch['app_name'],
                                                 app_input_file=app_input_s3_path,
                                                 app_output_file=app_output_s3_path,
                                                 app_params=app_params,
                                                 )
             producer.publish(message)
         
         message = messaging.createMessage(
                                         'preprocessing_completed', 
                                         input_id=kwargs['input_id'],
                                         group_id=group_id,
                                         )
         producer.publish(message)
Exemple #2
0
def main(config):
    ts_start = datetime.now()
    logger = auxiliary.getLogger()
        
    s3cfg = auxiliary.read_option(config, 'general', 's3cfg')
    
    app_input_s3_path_full = auxiliary.read_option(config, 'general', 'app_input_file')
    app_output_s3_path_full = auxiliary.read_option(config, 'general', 'app_output_file')
    licence = auxiliary.read_option(config, 'general', 'licence')
    grok = auxiliary.read_option(config, 'general', 'grok')
    hgs = auxiliary.read_option(config, 'general', 'hgs')
    
    current_dir = os.getcwd()
    app_input_name = os.path.basename(app_input_s3_path_full)
    app_input_path = os.path.join(current_dir, app_input_name)
    app_output_name = os.path.basename(app_output_s3_path_full)
    app_output_path = os.path.join(current_dir, app_output_name)
    
    logger.info('Starting %s script...\n' % (__name__))
    logger.info('Configuration options:')
    logger.info('Current directory: %s'% (current_dir))
    logger.info('Input archive location: %s'% (app_input_s3_path_full))
    logger.info('Output archive location: %s'% (app_output_s3_path_full))
    logger.info('GROK location: %s'% (grok))
    logger.info('HGS location: %s\n'% (hgs))
    
    logger.info('Downloading input archive')
    s3tools.s3cmdGET(s3cfg, app_input_s3_path_full, app_input_path, shellOutput=False)
    
    hgs_file = auxiliary.seekZipForElement(app_input_path, AppTypes.HGS)[0]
    working_dir = os.path.join(current_dir, os.path.split(hgs_file)[0])
    root_dir = os.path.join(current_dir, auxiliary.getRootDir(hgs_file))
    logger.info('Working directory: %s'% (working_dir))
    
    logger.info('Extracting input archive...')
    auxiliary.extractZip(app_input_path, current_dir)
    os.chdir(working_dir)

    cmd = '%s 1>&1 | tee -a %s' %(grok, 'grok.log')
    logger.info('Executing GROK: %s' % (cmd))
    auxiliary.execute(cmd)
    
    cmd = '%s 1>&1 | tee -a %s' %(hgs, 'hgs.log')
    logger.info('Executing HGS: %s' % (cmd))
    auxiliary.execute(cmd)
    
    os.chdir(current_dir)
    logger.info('Creating output archive...')
    auxiliary.createZip(app_output_path[:-4], root_dir)
    
    logger.info('Uploading output archive')
    
    #while not s3tools.s3cmdLS(s3cfg, app_output_s3_path_full, shellOutput=False):
    s3tools.s3cmdPUT(s3cfg, app_output_path, app_output_s3_path_full, shellOutput=False)
    
    logger.info('Done.')
    
    ts_end = datetime.now()
    ts_dif = ts_end - ts_start
    print 'GROK & HGS execution + claud I/O operations took: %s seconds' % ts_dif.total_seconds()
    print 'Executed on machine: %s' % messaging.getIPAdreess()