def _preprocess(self, return_params, kwargs): prefix = 's3://' aws_storage_bucket_name = auxiliary.read_option(self.config, 'aws', 'aws_storage_bucket_name') aws_storage_head = os.path.join(prefix, aws_storage_bucket_name) s3_app_input = auxiliary.read_option(self.config, 'preprocessor', 's3_app_input') s3_app_output = auxiliary.read_option(self.config, 'preprocessor', 's3_app_output') s3_app_scripts = auxiliary.read_option(self.config, 'preprocessor', 's3_app_scripts') s3cfg_path = os.path.join(self.CLAUDE_ROOT, auxiliary.read_option(self.config, 'preprocessor', 's3cfg')) group_id = kwargs['group_id'] input_param = kwargs['input_param'] producer = messaging.MessageProducer( messaging.buildRabbitMQConnectionLink(address=return_params['return_address']), return_params['return_queue'], ) scripts_dir = scripts.__path__[0] with auxiliary.TemporaryDirectory() as tmp_dir: input_s3_path_full = os.path.join(aws_storage_head, kwargs['input_file']) input_arch_name = os.path.basename(input_s3_path_full) input_arch_path = os.path.join(tmp_dir, input_arch_name) s3tools.s3cmdGET(s3cfg_path, input_s3_path_full, input_arch_path) ready_archs = self._handle_local_arch(input_arch_path) for arch in ready_archs: app_type = AppParamsProvider(arch['app_type'], input_param) script_name = app_type.getAppScriptName() script_path = os.path.join(scripts_dir, script_name) script_s3_path_full = os.path.join(*[aws_storage_head, s3_app_scripts, script_name]) s3tools.s3cmdPUT(s3cfg_path, script_path, script_s3_path_full) app_input_path = arch['arch_path'] app_input_name = os.path.basename(app_input_path) app_input_s3_path = os.path.join(s3_app_input, app_input_name) app_input_s3_path_full = os.path.join(aws_storage_head, app_input_s3_path) s3tools.s3cmdPUT(s3cfg_path, app_input_path, app_input_s3_path_full) app_output_s3_path = os.path.join(s3_app_output, 'output_' + app_input_name) app_output_s3_path_full = os.path.join(aws_storage_head, app_output_s3_path) app_params = { 'app_input_file' : app_input_s3_path_full, 'app_output_file' : app_output_s3_path_full, 'app_script' : script_s3_path_full, } app_params.update(app_type.getAppSpecificParams()) message = messaging.createMessage( 'preprocessed', group_id=group_id, app_type=arch['app_type'], app_name=arch['app_name'], app_input_file=app_input_s3_path, app_output_file=app_output_s3_path, app_params=app_params, ) producer.publish(message) message = messaging.createMessage( 'preprocessing_completed', input_id=kwargs['input_id'], group_id=group_id, ) producer.publish(message)
def main(config): ts_start = datetime.now() logger = auxiliary.getLogger() s3cfg = auxiliary.read_option(config, 'general', 's3cfg') app_input_s3_path_full = auxiliary.read_option(config, 'general', 'app_input_file') app_output_s3_path_full = auxiliary.read_option(config, 'general', 'app_output_file') licence = auxiliary.read_option(config, 'general', 'licence') grok = auxiliary.read_option(config, 'general', 'grok') hgs = auxiliary.read_option(config, 'general', 'hgs') current_dir = os.getcwd() app_input_name = os.path.basename(app_input_s3_path_full) app_input_path = os.path.join(current_dir, app_input_name) app_output_name = os.path.basename(app_output_s3_path_full) app_output_path = os.path.join(current_dir, app_output_name) logger.info('Starting %s script...\n' % (__name__)) logger.info('Configuration options:') logger.info('Current directory: %s'% (current_dir)) logger.info('Input archive location: %s'% (app_input_s3_path_full)) logger.info('Output archive location: %s'% (app_output_s3_path_full)) logger.info('GROK location: %s'% (grok)) logger.info('HGS location: %s\n'% (hgs)) logger.info('Downloading input archive') s3tools.s3cmdGET(s3cfg, app_input_s3_path_full, app_input_path, shellOutput=False) hgs_file = auxiliary.seekZipForElement(app_input_path, AppTypes.HGS)[0] working_dir = os.path.join(current_dir, os.path.split(hgs_file)[0]) root_dir = os.path.join(current_dir, auxiliary.getRootDir(hgs_file)) logger.info('Working directory: %s'% (working_dir)) logger.info('Extracting input archive...') auxiliary.extractZip(app_input_path, current_dir) os.chdir(working_dir) cmd = '%s 1>&1 | tee -a %s' %(grok, 'grok.log') logger.info('Executing GROK: %s' % (cmd)) auxiliary.execute(cmd) cmd = '%s 1>&1 | tee -a %s' %(hgs, 'hgs.log') logger.info('Executing HGS: %s' % (cmd)) auxiliary.execute(cmd) os.chdir(current_dir) logger.info('Creating output archive...') auxiliary.createZip(app_output_path[:-4], root_dir) logger.info('Uploading output archive') #while not s3tools.s3cmdLS(s3cfg, app_output_s3_path_full, shellOutput=False): s3tools.s3cmdPUT(s3cfg, app_output_path, app_output_s3_path_full, shellOutput=False) logger.info('Done.') ts_end = datetime.now() ts_dif = ts_end - ts_start print 'GROK & HGS execution + claud I/O operations took: %s seconds' % ts_dif.total_seconds() print 'Executed on machine: %s' % messaging.getIPAdreess()