def __call__(self, processing, transform, input_collection, output_collection): try: processing_metadata = processing['processing_metadata'] if 'rule_id' in processing_metadata: ret = { 'processing_id': processing['processing_id'], 'status': ProcessingStatus.Submitted } else: if 'rule_submitter' not in self.plugins: raise exceptions.AgentPluginError( 'Plugin rule_submitter is required') rule_id = self.plugins['rule_submitter'](processing, transform, input_collection) processing_metadata['rule_id'] = rule_id ret = { 'processing_id': processing['processing_id'], 'status': ProcessingStatus.Submitted, 'processing_metadata': processing_metadata } return ret except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError( '%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, processing, transform, input_collection, output_collection, output_contents): try: processing_metadata = processing['processing_metadata'] rule_id = processing_metadata['rule_id'] if 'rule_poller' not in self.plugins: raise exceptions.AgentPluginError('Plugin rule_poller is required') rule, replicases_status = self.plugins['rule_poller'](rule_id) updated_files = [] processing_updates = {} if replicases_status: file_status_statistics = {} for file in output_contents: file_key = '%s:%s' % (file['scope'], file['name']) if file_key in replicases_status: new_file_status = replicases_status[file_key] if not new_file_status == file['status']: file['status'] = new_file_status updated_file = {'content_id': file['content_id'], 'status': new_file_status, 'scope': file['scope'], 'name': file['name'], 'path': None} updated_files.append(updated_file) if file['status'] not in file_status_statistics: file_status_statistics[file['status']] = 0 file_status_statistics[file['status']] += 1 file_status_keys = list(file_status_statistics.keys()) if len(file_status_keys) == 1: if file_status_keys == [ContentStatus.Available] and rule['state'] == 'OK': processing_status = ProcessingStatus.Finished elif file_status_keys == [ContentStatus.Failed]: processing_status = ProcessingStatus.Failed else: processing_status = ProcessingStatus.Running file_statusvalue_statistics = {} for key in file_status_statistics: file_statusvalue_statistics[key.name] = file_status_statistics[key] processing_metadata['content_status_statistics'] = file_statusvalue_statistics processing_updates = {'status': processing_status, 'processing_metadata': processing_metadata} return {'updated_files': updated_files, 'processing_updates': processing_updates} except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError('%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, coll_scope, coll_name, account=None, lifetime=7 * 24 * 3600, rse=None, files=[]): try: try: self.client.get_did(scope=coll_scope, name=coll_name) except DataIdentifierNotFound: self.client.add_dataset(scope=coll_scope, name=coll_name, rules=[{'account': account, 'copies': 1, 'rse_expression': rse, 'grouping': 'DATASET', 'lifetime': lifetime}]) new_files = [] for file in files: new_file = {'scope': file['scope'], 'name': file['name'], 'state': 'A', # available 'bytes': file['content_size'], 'adler32': file['adler32'], 'pfn': file['pfn']} new_files.append(new_file) self.client.add_replicas(rse=rse, files=new_files) self.client.add_files_to_dataset(scope=coll_scope, name=coll_name, files=[new_files], rse=rse) except Exception as error: self.logger.error(error) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError('%s: %s' % (str(error), traceback.format_exc()))
def __call__(self, transform, input_collection, output_collection, input_contents): try: if not input_contents: return [] transform_metadata = transform['transform_metadata'] initial_points = [] if 'initial_points' in transform_metadata: initial_points = transform_metadata['initial_points'] output_contents = [] i = 0 for initial_point in initial_points: point, idds_output = initial_point content_metadata = {'input_collection_id': input_collection['coll_id'] } content = {'coll_id': output_collection['coll_id'], # 'scope': output_collection['scope'], 'scope': 'hpo', 'name': str(i), 'min_id': 0, 'max_id': 0, 'path': json.dumps((point, idds_output)), 'content_type': ContentType.PseudoContent, 'content_metadata': content_metadata} if idds_output: content['status'] = ContentStatus.Available output_contents.append(content) i += 1 return output_contents except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError('%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, processing, transform, input_collection): try: transform_metadata = transform['transform_metadata'] if 'rule_id' in transform_metadata and transform_metadata['rule_id']: return transform_metadata['rule_id'] did = {'scope': input_collection['scope'], 'name': input_collection['name']} try: rule_id = self.client.add_replication_rule(dids=[did], copies=1, rse_expression=transform_metadata['dest_rse'], source_replica_expression=transform_metadata['src_rse'], lifetime=self.lifetime, locked=False, grouping='DATASET', ask_approval=False) return rule_id except DuplicateRule as ex: self.logger.warn(ex) rules = self.client.list_did_rules(scope=input_collection['scope'], name=input_collection['name']) for rule in rules: if rule['account'] == self.client.account: return rule['id'] except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError('%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, transform, input_collection, output_collection, input_contents): try: output_contents = [] if input_contents: content_metadata = {'input_collection_id': input_collection['coll_id'], 'input_contents': []} content = {'coll_id': output_collection['coll_id'], 'scope': output_collection['scope'], 'name': 'activelearning_%s' % output_collection['coll_id'], 'min_id': 0, 'max_id': 0, 'status': ContentStatus.New, 'path': None, 'content_type': ContentType.PseudoContent, 'adler32': None, 'content_metadata': content_metadata} for input_content in input_contents: content_metadata['input_contents'].append(input_content['content_id']) output_contents.append(content) return output_contents except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError('%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, transform, input_collection, output_collection, input_contents): try: output_contents = [] for input_content in input_contents: content_metadata = { 'input_collection_id': input_collection['coll_id'], 'input_contents': [{ 'content_id': input_content['content_id'], 'min_id': input_content['min_id'], 'max_id': input_content['max_id'] }] } content = { 'coll_id': output_collection['coll_id'], 'scope': input_content['scope'], 'name': input_content['name'], 'min_id': input_content['min_id'], 'max_id': input_content['max_id'], 'content_type': input_content['content_type'], 'adler32': input_content['adler32'], 'content_metadata': content_metadata } output_contents.append(content) return output_contents except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError( '%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, scope, name): try: did_infos = self.client.list_dids(scope, {'name': name}, type='collection', long=True, recursive=False) collections = [] for did_info in did_infos: if did_info['did_type'] == 'DATASET': coll_type = CollectionType.Dataset elif did_info['did_type'] == 'CONTAINER': coll_type = CollectionType.Container else: coll_type = CollectionType.File collection = { 'scope': did_info['scope'], 'name': did_info['name'], 'total_files': did_info['length'], 'bytes': did_info['bytes'], 'coll_type': coll_type } collections.append(collection) return collections except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError( '%s: %s' % (str(ex), traceback.format_exc()))
def get_rucio_client(self): try: client = Client() except CannotAuthenticate as error: self.logger.error(error) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError( '%s: %s' % (str(error), traceback.format_exc())) return client
def __call__(self, processing, transform, input_collection, output_collection): try: contents = core_catalog.get_contents_by_coll_id_status( coll_id=input_collection['coll_id']) files = [] for content in contents: file = '%s:%s' % (content['scope'], content['name']) files.append(file) input_list = ','.join(files) job_dir = self.get_job_dir(processing['processing_id']) input_json = 'idds_input.json' with open(os.path.join(job_dir, input_json), 'w') as f: json.dump(files, f) sandbox = None if 'sandbox' in transform['transform_metadata']: sandbox = transform['transform_metadata']['sandbox'] executable = transform['transform_metadata']['executable'] arguments = transform['transform_metadata']['arguments'] output_json = None if 'output_json' in transform['transform_metadata']: output_json = transform['transform_metadata']['output_json'] job_id, outputs = self.submit_job(processing['processing_id'], sandbox, executable, arguments, input_list, input_json, output_json) processing_metadata = processing['processing_metadata'] processing_metadata['job_id'] = job_id processing_metadata['submitter'] = self.name if not job_id: processing_metadata['submit_errors'] = outputs else: processing_metadata['submit_errors'] = None ret = { 'processing_id': processing['processing_id'], 'status': ProcessingStatus.Submitted, 'next_poll_at': datetime.datetime.utcnow() + datetime.timedelta(seconds=self.poll_time_period), 'processing_metadata': processing_metadata } return ret except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError( '%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, scope, name): try: ret_files = [] files = self.client.list_files(scope=scope, name=name) for file in files: ret_file = {'scope': file['scope'], 'name': file['name'], 'bytes': file['bytes'], 'events': file['events'], 'adler32': file['adler32']} ret_files.append(ret_file) return ret_files except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError('%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, rule_id): try: rule = self.client.get_replication_rule(rule_id=rule_id) # rule['state'] replicases_status = {} if rule['locks_ok_cnt'] > 0: locks = self.client.list_replica_locks(rule_id=rule_id) for lock in locks: scope_name = '%s:%s' % (lock['scope'], lock['name']) replicases_status[scope_name] = self.get_state(lock['state']) return rule, replicases_status except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError('%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, scope, name): try: meta = {} did_meta = self.client.get_metadata(scope=scope, name=name) meta = {'bytes': did_meta['bytes'], 'availability': did_meta['availability'], 'events': did_meta['events'], 'is_open': did_meta['is_open'], 'run_number': did_meta['run_number'], 'status': CollectionStatus.Open if did_meta['is_open'] else CollectionStatus.Closed, 'total_files': did_meta['length']} return meta except Exception as error: self.logger.error(error) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError('%s: %s' % (str(error), traceback.format_exc()))
def __call__(self, transform, input_collection, output_collection, input_contents): try: transform_metadata = transform['transform_metadata'] initial_points = [] if 'initial_points' in transform_metadata: initial_points = transform_metadata['initial_points'] output_contents = [] i = 0 for initial_point in initial_points: idds_output = None if 'IDDS_OUTPUT' in initial_point: idds_output = initial_point['IDDS_OUTPUT'] content_metadata = { 'input_collection_id': input_collection['coll_id'], 'point': initial_point } content = { 'coll_id': output_collection['coll_id'], 'scope': output_collection['scope'], 'name': 'pseudo_' + str(i), 'min_id': 0, 'max_id': 0, 'content_type': ContentType.PseudoContent, 'content_metadata': content_metadata } if idds_output: content['status'] = ContentStatus.Available output_contents.append(content) i += 1 return output_contents except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError( '%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, processing, transform, input_collection, output_collection, output_contents): try: # if 'result_parser' in transform['transform_metadata'] and transform['transform_metadata']['result_parser'] # The exec part is already finished. No need to poll the job. Here we just need to poll the results. if processing['status'] in [ ProcessingStatus.FinishedOnExec, ProcessingStatus.FinishedOnExec.value ]: updated_files = [] unevaluated_points = 0 processing_status = processing['status'] for file in output_contents: if file['status'] not in [ ContentStatus.Available, ContentStatus.Available.value ]: path = file['path'] point, loss = json.loads(path) if loss is not None: file_status = ContentStatus.Available updated_file = { 'content_id': file['content_id'], 'status': file_status, 'scope': file['scope'], 'name': file['name'], 'path': path, 'content_metadata': file['content_metadata'] } updated_files.append(updated_file) else: unevaluated_points += 1 if self.min_unevaluated_points and unevaluated_points >= self.min_unevaluated_points: pass else: p_output_metadata = processing['output_metadata'] if p_output_metadata: processing_status = ProcessingStatus.FinishedOnStep else: if unevaluated_points == 0: processing_status = ProcessingStatus.Finished else: pass new_processing = None if processing_status == ProcessingStatus.FinishedOnStep: new_processing = self.create_new_processing(processing) processing_updates = { 'status': processing_status, 'substatus': processing['substatus'], 'processing_metadata': processing['processing_metadata'] } return { 'updated_files': updated_files, 'processing_updates': processing_updates, 'new_processing': new_processing, 'new_files': [] } processing_metadata = processing['processing_metadata'] output_metadata = None if 'submitter' in processing_metadata and processing_metadata[ 'submitter'] == self.name: job_id = processing_metadata['job_id'] if job_id: job_status, job_err_msg = self.poll_job_status( processing['processing_id'], job_id) else: job_status = ProcessingStatus.Failed job_err_msg = 'job_id is cannot be found in the processing metadata.' new_files = [] processing_status = ProcessingStatus.Running processing_substatus = ProcessingStatus.Running if job_status in [ ProcessingStatus.Finished, ProcessingStatus.Finished.value ]: if 'output_json' in processing_metadata: job_outputs, parser_errors = self.parse_job_outputs( processing['processing_id'], processing_metadata['output_json']) if job_outputs: # processing_status = ProcessingStatus.FinishedOnStep processing_status = ProcessingStatus.FinishedOnExec processing_substatus = ProcessingStatus.Finished processing_metadata['job_status'] = job_status.name processing_metadata['final_error'] = None # processing_metadata['final_outputs'] = job_outputs output_metadata = job_outputs new_files = self.generate_new_contents( transform, input_collection, output_collection, job_outputs) elif job_outputs is not None and type(job_outputs) in [ list ] and len(job_outputs) == 0: processing_status = ProcessingStatus.FinishedOnExec processing_substatus = ProcessingStatus.Finished processing_metadata['job_status'] = job_status.name processing_metadata['final_error'] = None # processing_metadata['final_outputs'] = job_outputs output_metadata = job_outputs new_files = self.generate_new_contents( transform, input_collection, output_collection, job_outputs) else: processing_status = ProcessingStatus.FinishedOnExec processing_substatus = ProcessingStatus.Failed processing_metadata['job_status'] = job_status.name err_msg = 'Failed to parse outputs: %s' % str( parser_errors) processing_metadata['final_errors'] = err_msg else: processing_status = ProcessingStatus.FinishedOnExec processing_substatus = ProcessingStatus.Failed processing_metadata['job_status'] = job_status.name err_msg = 'Failed to parse outputs: "output_json" file is not defined and it is the only way currently supported to parse the results' processing_metadata['final_errors'] = err_msg else: if job_status in [ ProcessingStatus.Failed, ProcessingStatus.Cancel ]: processing_status = ProcessingStatus.FinishedOnExec processing_substatus = ProcessingStatus.Failed processing_metadata['job_status'] = job_status.name err_msg = 'The job failed: %s' % job_err_msg processing_metadata['final_errors'] = err_msg updated_files = [] for file in output_contents: if file['status'] not in [ ContentStatus.Available, ContentStatus.Available.value ]: path = file['path'] point, loss = json.loads(path) if loss is not None: file_status = ContentStatus.Available updated_file = { 'content_id': file['content_id'], 'status': file_status, 'scope': file['scope'], 'name': file['name'], 'path': path, 'content_metadata': file['content_metadata'] } updated_files.append(updated_file) processing_updates = { 'status': processing_status, 'substatus': processing_substatus, 'processing_metadata': processing_metadata } if output_metadata is not None: processing_updates['output_metadata'] = output_metadata return { 'updated_files': updated_files, 'processing_updates': processing_updates, 'new_processing': None, 'new_files': new_files } except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError( '%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, processing, transform, input_collection, output_collection): try: contents = core_catalog.get_contents_by_coll_id_status( coll_id=output_collection['coll_id']) points = [] unevaluated_points = 0 for content in contents: point = content['content_metadata']['point'] points.append(point) if not content['status'] == ContentStatus.Available: unevaluated_points += 1 if unevaluated_points >= self.min_unevaluated_points: # not submit the job processing_metadata = processing['processing_metadata'] processing_metadata['unevaluated_points'] = unevaluated_points ret = { 'processing_id': processing['processing_id'], 'status': ProcessingStatus.New, 'processing_metadata': processing_metadata } return ret job_dir = self.get_job_dir(processing['processing_id']) input_json = 'idds_input.json' with open(os.path.join(job_dir, input_json), 'w') as f: json.dump(points, f) sandbox = None if 'sandbox' in transform['transform_metadata']: sandbox = transform['transform_metadata']['sandbox'] executable = transform['transform_metadata']['executable'] arguments = transform['transform_metadata']['arguments'] output_json = None if 'output_json' in transform['transform_metadata']: output_json = transform['transform_metadata']['output_json'] param_values = { 'NUM_POINTS': self.max_unevaluated_points - unevaluated_points, 'IN': 'input_json', 'OUT': output_json } executable = replace_parameters_with_values( executable, param_values) arguments = replace_parameters_with_values(arguments, param_values) input_list = None job_id, outputs = self.submit_job(processing['processing_id'], sandbox, executable, arguments, input_list, input_json, output_json) processing_metadata = processing['processing_metadata'] processing_metadata['job_id'] = job_id processing_metadata['submitter'] = self.name if not job_id: processing_metadata['submit_errors'] = outputs else: processing_metadata['submit_errors'] = None ret = { 'processing_id': processing['processing_id'], 'status': ProcessingStatus.Submitted, 'processing_metadata': processing_metadata } return ret except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError( '%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, processing, transform, input_collection, output_collection, output_contents): raise exceptions.AgentPluginError('NotImplemented')
def __call__(self, processing, transform, input_collection, output_collection): try: contents = core_catalog.get_contents_by_coll_id_status(coll_id=output_collection['coll_id']) points = [] unevaluated_points = 0 for content in contents: # point = content['content_metadata']['point'] point = json.loads(content['path']) points.append(point) if not content['status'] == ContentStatus.Available: unevaluated_points += 1 job_dir = self.get_job_dir(processing['processing_id']) input_json = 'idds_input.json' opt_space = None opt_points = {'points': points} if 'opt_space' in transform['transform_metadata']: opt_space = transform['transform_metadata']['opt_space'] opt_points['opt_space'] = opt_space else: opt_points['opt_space'] = None with open(os.path.join(job_dir, input_json), 'w') as f: json.dump(opt_points, f) if 'method' in transform['transform_metadata'] and transform['transform_metadata']['method']: status, errors, sandbox, executable, arguments, input_json, output_json, should_transfer_executable = self.get_executable_arguments_for_method(transform['transform_metadata'], input_json, unevaluated_points) else: status, errors, sandbox, executable, arguments, input_json, output_json, should_transfer_executable = self.get_executable_arguments_for_sandbox(transform['transform_metadata'], input_json, unevaluated_points) if status != 0: processing_metadata = processing['processing_metadata'] processing_metadata['job_id'] = None processing_metadata['submitter'] = self.name processing_metadata['submit_errors'] = errors processing_metadata['output_json'] = output_json processing_metadata['max_points'] = self.get_max_points(transform['transform_metadata']) # processing_metadata['job_dir'] = job_dir ret = {'processing_id': processing['processing_id'], 'status': ProcessingStatus.Submitted, 'processing_metadata': processing_metadata} else: input_list = None job_id, outputs = self.submit_job(processing['processing_id'], sandbox, executable, arguments, input_list, input_json, output_json, should_transfer_executable) processing_metadata = processing['processing_metadata'] processing_metadata['job_id'] = job_id processing_metadata['submitter'] = self.name processing_metadata['output_json'] = output_json processing_metadata['max_points'] = self.get_max_points(transform['transform_metadata']) # processing_metadata['job_dir'] = job_dir if not job_id: processing_metadata['submit_errors'] = outputs else: processing_metadata['submit_errors'] = None ret = {'processing_id': processing['processing_id'], 'status': ProcessingStatus.Submitted, 'next_poll_at': datetime.datetime.utcnow() + datetime.timedelta(seconds=self.poll_time_period), 'processing_metadata': processing_metadata} return ret except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError('%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, processing, transform, input_collection, output_collection, output_contents): try: # if 'result_parser' in transform['transform_metadata'] and transform['transform_metadata']['result_parser'] processing_metadata = processing['processing_metadata'] output_metadata = None if 'submitter' in processing_metadata and processing_metadata[ 'submitter'] == self.name: job_id = processing_metadata['job_id'] if job_id: job_status, job_err_msg = self.poll_job_status( processing['processing_id'], job_id) else: job_status = ProcessingStatus.Failed job_err_msg = 'job_id is cannot be found in the processing metadata.' processing_status = ProcessingStatus.Running if job_status in [ ProcessingStatus.Finished, ProcessingStatus.Finished.value ]: if 'output_json' in transform['transform_metadata']: job_outputs, parser_errors = self.parse_job_outputs( processing['processing_id'], transform['transform_metadata']['output_json']) if job_outputs: processing_status = ProcessingStatus.Finished processing_metadata['job_status'] = job_status.name processing_metadata['final_error'] = None # processing_metadata['final_outputs'] = job_outputs output_metadata = job_outputs else: processing_status = ProcessingStatus.Failed processing_metadata['job_status'] = job_status.name err_msg = 'Failed to parse outputs: %s' % str( parser_errors) processing_metadata['final_errors'] = err_msg else: processing_status = ProcessingStatus.Failed processing_metadata['job_status'] = job_status.name err_msg = 'Failed to parse outputs: "output_json" file is not defined and it is the only way currently supported to parse the results' processing_metadata['final_errors'] = err_msg else: if job_status in [ ProcessingStatus.Failed, ProcessingStatus.Cancel ]: processing_status = ProcessingStatus.Failed processing_metadata['job_status'] = job_status.name err_msg = 'The job failed: %s' % job_err_msg processing_metadata['final_errors'] = err_msg updated_files = [] if processing_status in [ ProcessingStatus.Finished, ProcessingStatus.Failed ]: if processing_status == ProcessingStatus.Finished: file_status = ContentStatus.Available # content_metadata = processing_metadata['final_outputs'] content_metadata = output_metadata if processing_status == ProcessingStatus.Failed: file_status = ContentStatus.Failed content_metadata = None for file in output_contents: updated_file = { 'content_id': file['content_id'], 'status': file_status, 'scope': file['scope'], 'name': file['name'], 'path': None, 'content_metadata': content_metadata } updated_files.append(updated_file) processing_updates = { 'status': processing_status, 'processing_metadata': processing_metadata } if output_metadata: processing_updates['output_metadata'] = output_metadata return { 'updated_files': updated_files, 'processing_updates': processing_updates } except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError( '%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, processing, transform, input_collection, output_collection, output_contents): try: # if 'result_parser' in transform['transform_metadata'] and transform['transform_metadata']['result_parser'] # The exec part is already finished. No need to poll the job. Here we just need to poll the results. if processing['status'] in [ ProcessingStatus.FinishedOnExec, ProcessingStatus.FinishedOnExec.value ]: updated_files = [] unevaluated_points = 0 processing_status = processing['status'] processing_substatus = processing['substatus'] for file in output_contents: if file['status'] not in [ ContentStatus.Available, ContentStatus.Available.value ]: path = file['path'] point, loss = json.loads(path) if loss is not None: file_status = ContentStatus.Available updated_file = { 'content_id': file['content_id'], 'status': file_status, 'scope': file['scope'], 'name': file['name'], 'path': path, 'content_metadata': file['content_metadata'] } updated_files.append(updated_file) else: unevaluated_points += 1 if unevaluated_points == 0: if processing_substatus in [ ProcessingStatus.FinishedTerm, ProcessingStatus.FinishedTerm.value ]: processing_status = ProcessingStatus.Finished elif processing_substatus in [ ProcessingStatus.Failed, ProcessingStatus.Failed.value ]: processing_status = ProcessingStatus.Failed elif processing_substatus in [ ProcessingStatus.Timeout, ProcessingStatus.Timeout.value ]: processing_status = ProcessingStatus.Timeout else: # check whether max_points reached max_points = self.get_max_points(processing) if output_contents is None: output_contents = [] if (max_points and len(output_contents) >= max_points): processing_status = ProcessingStatus.Finished else: processing_status = ProcessingStatus.FinishedOnStep else: processing_metadata = processing['processing_metadata'] # fail processes if it waits too long time current_time = datetime.datetime.utcnow() last_touch_time = self.get_last_touch_time( processing, output_contents, updated_files) life_diff = current_time - last_touch_time life_time = life_diff.total_seconds() if life_time > self.max_life_time: processing_status = ProcessingStatus.TimeOut if processing_metadata['final_errors']: processing_metadata[ 'final_errors'] = "Timeout(%s seconds) to wait evaluation reports" % self.max_life_time + processing_metadata[ 'final_errors'] else: processing_metadata[ 'final_errors'] = "Timeout(%s seconds) to wait evaluation reports" % self.max_life_time else: if self.min_unevaluated_points and unevaluated_points >= self.min_unevaluated_points: pass else: # check whether max_points reached max_points = self.get_max_points(processing) if output_contents is None: output_contents = [] if (max_points and len(output_contents) >= max_points): pass else: processing_status = ProcessingStatus.FinishedOnStep new_processing = None if processing_status == ProcessingStatus.FinishedOnStep: new_processing = self.create_new_processing(processing) processing_updates = { 'status': processing_status, 'substatus': processing['substatus'], 'next_poll_at': datetime.datetime.utcnow() + datetime.timedelta(seconds=self.poll_time_period), 'processing_metadata': processing['processing_metadata'] } return { 'updated_files': updated_files, 'processing_updates': processing_updates, 'new_processing': new_processing, 'new_files': [] } processing_metadata = processing['processing_metadata'] output_metadata = None if 'submitter' in processing_metadata and processing_metadata[ 'submitter'] == self.name: job_id = processing_metadata['job_id'] if job_id: job_status, job_err_msg, std_out_msg, std_err_msg = self.poll_job_status( processing['processing_id'], job_id) else: job_status = ProcessingStatus.Failed job_err_msg = 'job_id is cannot be found in the processing metadata.' std_out_msg = None std_err_msg = None if std_out_msg: std_out_msg = std_out_msg[-2000:] if std_err_msg: std_err_msg = std_err_msg[-2000:] new_files = [] processing_status = ProcessingStatus.Running processing_substatus = ProcessingStatus.Running if job_status in [ ProcessingStatus.Finished, ProcessingStatus.Finished.value ]: if 'output_json' in processing_metadata: job_outputs, parser_errors = self.parse_job_outputs( processing['processing_id'], processing_metadata['output_json']) if job_outputs: # processing_status = ProcessingStatus.FinishedOnStep processing_status = ProcessingStatus.FinishedOnExec processing_substatus = ProcessingStatus.Finished processing_metadata['job_status'] = job_status.name processing_metadata['final_errors'] = None # processing_metadata['final_outputs'] = job_outputs output_metadata = job_outputs new_files = self.generate_new_contents( transform, input_collection, output_collection, job_outputs) elif job_outputs is not None and type(job_outputs) in [ list ] and len(job_outputs) == 0: processing_status = ProcessingStatus.FinishedOnExec processing_substatus = ProcessingStatus.FinishedTerm processing_metadata['job_status'] = job_status.name processing_metadata[ 'final_errors'] = "No new hyperparameters are created." + " stderr: (%s), stdout: (%s)" % ( std_out_msg, std_err_msg) # processing_metadata['final_outputs'] = job_outputs output_metadata = job_outputs new_files = self.generate_new_contents( transform, input_collection, output_collection, job_outputs) else: processing_status = ProcessingStatus.FinishedOnExec processing_substatus = ProcessingStatus.Failed processing_metadata['job_status'] = job_status.name err_msg = 'Failed to parse outputs: %s' % str( parser_errors) processing_metadata[ 'final_errors'] = err_msg + " stderr: (%s), stdout: (%s)" % ( std_out_msg, std_err_msg) else: processing_status = ProcessingStatus.FinishedOnExec processing_substatus = ProcessingStatus.Failed processing_metadata['job_status'] = job_status.name err_msg = 'Failed to parse outputs: "output_json" file is not defined and it is the only way currently supported to parse the results' processing_metadata[ 'final_errors'] = err_msg + " stderr: (%s), stdout: (%s)" % ( std_out_msg, std_err_msg) else: if job_status in [ ProcessingStatus.Failed, ProcessingStatus.Cancel ]: processing_status = ProcessingStatus.FinishedOnExec processing_substatus = ProcessingStatus.Failed processing_metadata['job_status'] = job_status.name err_msg = 'The job failed: %s' % job_err_msg processing_metadata[ 'final_errors'] = err_msg + " stderr: (%s), stdout: (%s)" % ( std_out_msg, std_err_msg) if processing_status == ProcessingStatus.FinishedOnExec: job_dir = self.get_job_dir(processing['processing_id']) tar_file_name = self.tar_job_logs(job_dir) processing_metadata['job_logs_tar'] = tar_file_name updated_files = [] for file in output_contents: if file['status'] not in [ ContentStatus.Available, ContentStatus.Available.value ]: path = file['path'] point, loss = json.loads(path) if loss is not None: file_status = ContentStatus.Available updated_file = { 'content_id': file['content_id'], 'status': file_status, 'scope': file['scope'], 'name': file['name'], 'path': path, 'content_metadata': file['content_metadata'] } updated_files.append(updated_file) processing_updates = { 'status': processing_status, 'substatus': processing_substatus, 'next_poll_at': datetime.datetime.utcnow() + datetime.timedelta(seconds=self.poll_time_period), 'processing_metadata': processing_metadata } if output_metadata is not None: processing_updates['output_metadata'] = output_metadata # check whether max_points reached max_points = self.get_max_points(processing) if output_contents is None: output_contents = [] if max_points and new_files and len(output_contents) + len( new_files) >= max_points: left_points = max_points - len(output_contents) if left_points <= 0: left_points = 0 new_files = new_files[:left_points] return { 'updated_files': updated_files, 'processing_updates': processing_updates, 'new_processing': None, 'new_files': new_files } except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError( '%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, processing, transform, input_collection, output_collection): try: contents = core_catalog.get_contents_by_coll_id_status( coll_id=output_collection['coll_id']) points = [] unevaluated_points = 0 for content in contents: # point = content['content_metadata']['point'] point = json.loads(content['path']) points.append(point) if not content['status'] == ContentStatus.Available: unevaluated_points += 1 """ if self.min_unevaluated_points and unevaluated_points >= self.min_unevaluated_points: # not submit the job processing_metadata = processing['processing_metadata'] processing_metadata['unevaluated_points'] = unevaluated_points processing_metadata['not_submit'] = 'unevaluated_points(%s) > min_unevaluated_points(%s)' % (unevaluated_points, self.min_unevaluated_points) self.logger.info("processing_id(%s) not submit currently because unevaluated_points(%s) >= min_unevaluated_points(%s)" % (processing['processing_id'], unevaluated_points, self.min_unevaluated_points)) ret = {'processing_id': processing['processing_id'], 'status': ProcessingStatus.New, 'processing_metadata': processing_metadata} return ret if 'not_submit' in processing_metadata: del processing_metadata['not_submit'] """ job_dir = self.get_job_dir(processing['processing_id']) input_json = 'idds_input.json' opt_space = None opt_points = {'points': points} if 'opt_space' in transform['transform_metadata']: opt_space = transform['transform_metadata']['opt_space'] opt_points['opt_space'] = opt_space else: opt_points['opt_space'] = None with open(os.path.join(job_dir, input_json), 'w') as f: json.dump(opt_points, f) if 'method' in transform['transform_metadata'] and transform[ 'transform_metadata']['method']: status, errors, sandbox, executable, arguments, input_json, output_json, should_transfer_executable = self.get_executable_arguments_for_method( transform['transform_metadata'], input_json, unevaluated_points) else: status, errors, sandbox, executable, arguments, input_json, output_json, should_transfer_executable = self.get_executable_arguments_for_sandbox( transform['transform_metadata'], input_json, unevaluated_points) if status != 0: processing_metadata = processing['processing_metadata'] processing_metadata['job_id'] = None processing_metadata['submitter'] = self.name processing_metadata['submit_errors'] = errors processing_metadata['output_json'] = output_json ret = { 'processing_id': processing['processing_id'], 'status': ProcessingStatus.Submitted, 'processing_metadata': processing_metadata } else: input_list = None job_id, outputs = self.submit_job(processing['processing_id'], sandbox, executable, arguments, input_list, input_json, output_json, should_transfer_executable) processing_metadata = processing['processing_metadata'] processing_metadata['job_id'] = job_id processing_metadata['submitter'] = self.name processing_metadata['output_json'] = output_json if not job_id: processing_metadata['submit_errors'] = outputs else: processing_metadata['submit_errors'] = None ret = { 'processing_id': processing['processing_id'], 'status': ProcessingStatus.Submitted, 'processing_metadata': processing_metadata } return ret except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError( '%s: %s' % (str(ex), traceback.format_exc()))
def __call__(self, processing, transform, input_collection, output_collection, output_contents): try: processing_metadata = processing['processing_metadata'] all_rule_notfound = True if 'rule_poller' not in self.plugins: raise exceptions.AgentPluginError( 'Plugin rule_poller is required') rule_id = processing_metadata['rule_id'] try: basic_rule, basic_replicases_status = self.plugins[ 'rule_poller'](rule_id) all_rule_notfound = False except exceptions.ProcessNotFound as ex: self.logger.warn(ex) basic_rule = None basic_replicases_status = [] if 'new_rule_ids' in processing_metadata: new_rule_ids = processing_metadata['new_rule_ids'] else: new_rule_ids = [] new_rules, new_replicases_statuses = [], [] for rule_id in new_rule_ids: try: new_rule, new_replicases_status = self.plugins[ 'rule_poller'](rule_id) all_rule_notfound = False except exceptions.ProcessNotFound as ex: self.logger.warn(ex) new_rule, new_replicases_status = None, [] new_rules.append(new_rule) new_replicases_statuses.append(new_replicases_status) remain_files = [] updated_files = [] processing_updates = {} file_status_statistics = {} for file in output_contents: file_key = '%s:%s' % (file['scope'], file['name']) new_file_status = self.get_replica_status( file_key, basic_replicases_status, new_replicases_statuses) if not new_file_status == file['status']: file['status'] = new_file_status updated_file = { 'content_id': file['content_id'], 'status': new_file_status, 'scope': file['scope'], 'name': file['name'], 'path': None } updated_files.append(updated_file) if file['status'] in [ContentStatus.New]: remain_file = { 'scope': file['scope'], 'name': file['name'] } remain_files.append(remain_file) if file['status'] not in file_status_statistics: file_status_statistics[file['status']] = 0 file_status_statistics[file['status']] += 1 file_status_keys = list(file_status_statistics.keys()) if len(file_status_keys) == 1: if file_status_keys == [ContentStatus.Available]: processing_status = ProcessingStatus.Finished elif file_status_keys == [ContentStatus.Failed]: processing_status = ProcessingStatus.Failed else: processing_status = ProcessingStatus.Running file_statusvalue_statistics = {} for key in file_status_statistics: file_statusvalue_statistics[ key.name] = file_status_statistics[key] processing_metadata[ 'content_status_statistics'] = file_statusvalue_statistics new_rule_id = None # self.logger.info("number of remain files: %s" % len(remain_files)) if remain_files and self.should_create_new_rule( basic_rule, new_rules, transform): self.logger.info("creating new rules") new_rule_id = self.create_new_rule( rule=basic_rule, dids=remain_files, dest_rse=basic_rule['rse_expression']) self.logger.info("For transform(%s), new rule id: %s" % (transform['transform_id'], new_rule_id)) if new_rule_id is not None: if ('new_rule_ids' not in processing_metadata): processing_metadata['new_rule_ids'] = [new_rule_id] else: processing_metadata['new_rule_ids'].append(new_rule_id) if all_rule_notfound and new_rule_id is None: processing_status = ProcessingStatus.Failed processing_updates = { 'status': processing_status, 'next_poll_at': datetime.datetime.utcnow() + datetime.timedelta(seconds=self.poll_time_period), 'processing_metadata': processing_metadata } return { 'updated_files': updated_files, 'processing_updates': processing_updates } except Exception as ex: self.logger.error(ex) self.logger.error(traceback.format_exc()) raise exceptions.AgentPluginError( '%s: %s' % (str(ex), traceback.format_exc()))