def _get_gc_block_list(self, use_phedex): dataset_name_list = self.get_dataset_name_list() progress_ds = ProgressActivity('Getting dataset', len(dataset_name_list)) for dataset_idx, dataset_path in enumerate(dataset_name_list): progress_ds.update_progress(dataset_idx, msg='Getting dataset %s' % dataset_path) counter = 0 blockinfo_list = list(self._filter_cms_blockinfo_list(dataset_path, not use_phedex)) progress_block = ProgressActivity('Getting block information', len(blockinfo_list)) for (block_path, replica_infos) in blockinfo_list: result = {} result[DataProvider.Dataset] = block_path.split('#')[0] result[DataProvider.BlockName] = block_path.split('#')[1] progress_block.update_progress(counter, msg='Getting block information for ' + result[DataProvider.BlockName]) if use_phedex: # Start parallel phedex query replicas_dict = {} phedex_thread = start_thread('Query phedex site info for %s' % block_path, self._get_phedex_replica_list, block_path, replicas_dict) self._fill_cms_fi_list(result, block_path) phedex_thread.join() replica_infos = replicas_dict.get(block_path) else: self._fill_cms_fi_list(result, block_path) result[DataProvider.Locations] = self._process_replica_list(block_path, replica_infos) if len(result[DataProvider.FileList]): counter += 1 yield result progress_block.finish() if counter == 0: raise DatasetError('Dataset %s does not contain any valid blocks!' % dataset_path) progress_ds.finish()
def iter_jobnum_output_dn(output_dn, jobnum_list): if jobnum_list: jobnum_list.sort() progress = ProgressActivity('Processing output directory', jobnum_list[-1]) for jobnum in jobnum_list: progress.update_progress(jobnum) yield (jobnum, os.path.join(output_dn, 'job_%d' % jobnum)) progress.finish()
def _iter_datasource_items(self, item, metadata_dict, entries, location_list, obj_dict): metadata_dict['GC_SOURCE_DIR'] = self._path progress = ProgressActivity('Reading source directory') for counter, size_url in enumerate(self._iter_path(self._path)): progress.update_progress(counter) metadata_dict['FILE_SIZE'] = size_url[0] url = size_url[1] if self._trim: url = url.replace('file://', '') yield (url, metadata_dict, entries, location_list, obj_dict) progress.finish()
def _iter_datasource_items(self, item, metadata_dict, entries, location_list, obj_dict): progress_max = None if self._selected: progress_max = self._selected[-1] + 1 progress = ProgressActivity('Reading job logs', progress_max) for jobnum in self._selected: progress.update_progress(jobnum) metadata_dict['GC_JOBNUM'] = jobnum obj_dict.update({'GC_TASK': self._ext_task, 'GC_WORKDIR': self._ext_work_dn}) job_output_dn = os.path.join(self._ext_work_dn, 'output', 'job_%d' % jobnum) yield (job_output_dn, metadata_dict, entries, location_list, obj_dict) progress.finish()
def download_job(opts, work_dn, status_mon, job_db, job_obj, jobnum): if job_obj.get('download') == 'True' and not opts.mark_ignore_dl: return status_mon.register_job_result(jobnum, 'All files already downloaded', JobDownloadStatus.JOB_ALREADY) # Read the file hash entries from job info file fi_list = FileInfoProcessor().process(os.path.join(work_dn, 'output', 'job_%d' % jobnum)) or [] is_download_failed = False if not fi_list: if opts.mark_empty_fail: is_download_failed = True else: return status_mon.register_job_result(jobnum, 'Job has no output files', JobDownloadStatus.JOB_NO_OUTPUT) download_result_list = [] progress = ProgressActivity('Processing output files', len(fi_list)) for (fi_idx, fi) in enumerate(fi_list): progress.update_progress(fi_idx, msg='Processing output file %r' % fi[FileInfo.NameDest]) download_result_list.append(download_single_file(opts, jobnum, fi_idx, fi, status_mon)) progress.finish() is_download_failed = is_download_failed or any(imap(download_result_list.__contains__, [ FileDownloadStatus.FILE_TIMEOUT, FileDownloadStatus.FILE_HASH_FAILED, FileDownloadStatus.FILE_TRANSFER_FAILED, FileDownloadStatus.FILE_MKDIR_FAILED])) is_download_success = all(imap([FileDownloadStatus.FILE_OK, FileDownloadStatus.FILE_EXISTS].__contains__, download_result_list)) # Ignore the first opts.retry number of failed jobs retry_count = int(job_obj.get('download attempt', 0)) if fi_list and is_download_failed and opts.retry and (retry_count < int(opts.retry)): set_job_prop(job_db, jobnum, job_obj, 'download attempt', str(retry_count + 1)) return status_mon.register_job_result(jobnum, 'Download attempt #%d failed' % retry_count + 1, JobDownloadStatus.RETRY) delete_files(opts, jobnum, fi_list, is_download_failed) if is_download_failed: if opts.mark_fail: # Mark job as failed to trigger resubmission job_obj.state = Job.FAILED job_db.commit(jobnum, job_obj) status_mon.register_job_result(jobnum, 'Download failed', JobDownloadStatus.JOB_FAILED) elif is_download_success: if opts.mark_dl: # Mark as downloaded set_job_prop(job_db, jobnum, job_obj, 'download', 'True') status_mon.register_job_result(jobnum, 'Download successful', JobDownloadStatus.JOB_OK) else: # eg. because of SE blacklist status_mon.register_job_result(jobnum, 'Download incomplete', JobDownloadStatus.JOB_INCOMPLETE)
def _iter_datasource_items(self, item, metadata_dict, entries, location_list, obj_dict): dn_list = lfilter(lambda fn: fn.startswith('job_'), os.listdir(self._ext_output_dir)) progress = ProgressActivity('Reading job logs', len(dn_list)) for idx, dn in enumerate(dn_list): progress.update_progress(idx) try: metadata_dict['GC_JOBNUM'] = int(dn.split('_')[1]) except Exception: clear_current_exception() continue obj_dict['GC_WORKDIR'] = self._ext_work_dn if self._selector and not self._selector(metadata_dict['GC_JOBNUM'], None): continue job_output_dn = os.path.join(self._ext_output_dir, dn) yield (job_output_dn, metadata_dict, entries, location_list, obj_dict) progress.finish()
def write(cls, fn, psrc_len, psrc_metadata, psp_iter): # write parameter part of parameter adapter fp = GZipTextFile(fn, 'w') try: vn_list = sorted(lmap(lambda p: p.value, ifilter(lambda p: not p.untracked, psrc_metadata))) fp.write('# %s\n' % json.dumps(vn_list)) progress = ProgressActivity('Writing parameter dump', progress_max=psrc_len) for jobnum, psp in enumerate(psp_iter): progress.update_progress(jobnum) psp_str = str.join('\t', imap(lambda k: json.dumps(psp.get(k, '')), vn_list)) if psp.get(ParameterInfo.ACTIVE, True): fp.write('%d\t%s\n' % (jobnum, psp_str)) else: fp.write('%d!\t%s\n' % (jobnum, psp_str)) progress.finish() finally: fp.close()
def _get_gc_block_list(self, use_phedex): dataset_name_list = self.get_dataset_name_list() progress_ds = ProgressActivity('Getting dataset', len(dataset_name_list)) for dataset_idx, dataset_path in enumerate(dataset_name_list): progress_ds.update_progress(dataset_idx, msg='Getting dataset %s' % dataset_path) counter = 0 blockinfo_list = list( self._filter_cms_blockinfo_list(dataset_path, not use_phedex)) progress_block = ProgressActivity('Getting block information', len(blockinfo_list)) for (block_path, replica_infos) in blockinfo_list: result = {} result[DataProvider.Dataset] = block_path.split('#')[0] result[DataProvider.BlockName] = block_path.split('#')[1] progress_block.update_progress( counter, msg='Getting block information for ' + result[DataProvider.BlockName]) if use_phedex and self._allow_phedex: # Start parallel phedex query replicas_dict = {} phedex_thread = start_thread( 'Query phedex site info for %s' % block_path, self._get_phedex_replica_list, block_path, replicas_dict) self._fill_cms_fi_list(result, block_path) phedex_thread.join() replica_infos = replicas_dict.get(block_path) else: self._fill_cms_fi_list(result, block_path) result[DataProvider.Locations] = self._process_replica_list( block_path, replica_infos) if len(result[DataProvider.FileList]): counter += 1 yield result progress_block.finish() if counter == 0: raise DatasetError( 'Dataset %s does not contain any valid blocks!' % dataset_path) progress_ds.finish()
def process_all(opts, args): # Init everything in each loop to pick up changes script_obj = get_script_object(args[0], opts.job_selector, only_success=False) token = AccessToken.create_instance(opts.token, script_obj.new_config, 'token') work_dn = script_obj.config.get_work_path() if process_all.first: logging.getLogger().addHandler( ProcessArchiveHandler(os.path.join(work_dn, 'error.tar'))) process_all.first = False # Create SE output dir if not opts.output: opts.output = os.path.join(work_dn, 'se_output') if '://' not in opts.output: opts.output = 'file:///%s' % os.path.abspath(opts.output) job_db = script_obj.job_db jobnum_list = job_db.get_job_list() status_mon = StatusMonitor(len(jobnum_list)) if opts.shuffle: random.shuffle(jobnum_list) else: jobnum_list.sort() if opts.threads: activity = Activity('Processing jobs') pool = GCThreadPool(opts.threads) for jobnum in jobnum_list: pool.start_daemon('Processing job %d' % jobnum, process_job, opts, work_dn, status_mon, job_db, token, jobnum) pool.wait_and_drop() activity.finish() else: progress = ProgressActivity('Processing job', max(jobnum_list) + 1) for jobnum in jobnum_list: progress.update_progress(jobnum) process_job(opts, work_dn, status_mon, job_db, token, jobnum) progress.finish() # Print overview if not opts.hide_results: status_mon.show_results() return status_mon.is_finished()
def _iter_datasource_items(self, item, metadata_dict, entries, location_list, obj_dict): progress_max = None if self._selected: progress_max = self._selected[-1] + 1 progress = ProgressActivity('Reading job logs', progress_max) for jobnum in self._selected: progress.update_progress(jobnum) metadata_dict['GC_JOBNUM'] = jobnum obj_dict.update({ 'GC_TASK': self._ext_task, 'GC_WORKDIR': self._ext_work_dn }) job_output_dn = os.path.join(self._ext_work_dn, 'output', 'job_%d' % jobnum) yield (job_output_dn, metadata_dict, entries, location_list, obj_dict) progress.finish()
def process_all(opts, args): # Init everything in each loop to pick up changes script_obj = get_script_object(args[0], opts.job_selector, only_success=False) token = AccessToken.create_instance(opts.token, script_obj.new_config, 'token') work_dn = script_obj.config.get_work_path() if process_all.first: logging.getLogger().addHandler(ProcessArchiveHandler(os.path.join(work_dn, 'error.tar'))) process_all.first = False # Create SE output dir if not opts.output: opts.output = os.path.join(work_dn, 'se_output') if '://' not in opts.output: opts.output = 'file:///%s' % os.path.abspath(opts.output) job_db = script_obj.job_db jobnum_list = job_db.get_job_list() status_mon = StatusMonitor(len(jobnum_list)) if opts.shuffle: random.shuffle(jobnum_list) else: jobnum_list.sort() if opts.threads: activity = Activity('Processing jobs') pool = GCThreadPool(opts.threads) for jobnum in jobnum_list: pool.start_daemon('Processing job %d' % jobnum, process_job, opts, work_dn, status_mon, job_db, token, jobnum) pool.wait_and_drop() activity.finish() else: progress = ProgressActivity('Processing job', max(jobnum_list) + 1) for jobnum in jobnum_list: progress.update_progress(jobnum) process_job(opts, work_dn, status_mon, job_db, token, jobnum) progress.finish() # Print overview if not opts.hide_results: status_mon.show_results() return status_mon.is_finished()
def _iter_datasource_items(self, item, metadata_dict, entries, location_list, obj_dict): dn_list = lfilter(lambda fn: fn.startswith('job_'), os.listdir(self._ext_output_dir)) progress = ProgressActivity('Reading job logs', len(dn_list)) for idx, dn in enumerate(dn_list): progress.update_progress(idx) try: metadata_dict['GC_JOBNUM'] = int(dn.split('_')[1]) except Exception: clear_current_exception() continue obj_dict['GC_WORKDIR'] = self._ext_work_dn if self._selector and not self._selector( metadata_dict['GC_JOBNUM'], None): continue job_output_dn = os.path.join(self._ext_output_dir, dn) yield (job_output_dn, metadata_dict, entries, location_list, obj_dict) progress.finish()
def write(cls, fn, psrc_len, psrc_metadata, psp_iter): # write parameter part of parameter adapter fp = GZipTextFile(fn, 'w') try: vn_list = sorted( lmap(lambda p: p.value, ifilter(lambda p: not p.untracked, psrc_metadata))) fp.write('# %s\n' % json.dumps(vn_list)) progress = ProgressActivity('Writing parameter dump', progress_max=psrc_len) for jobnum, psp in enumerate(psp_iter): progress.update_progress(jobnum) psp_str = str.join( '\t', imap(lambda k: json.dumps(psp.get(k, '')), vn_list)) if psp.get(ParameterInfo.ACTIVE, True): fp.write('%d\t%s\n' % (jobnum, psp_str)) else: fp.write('%d!\t%s\n' % (jobnum, psp_str)) progress.finish() finally: fp.close()
def download_job(opts, work_dn, status_mon, job_db, job_obj, jobnum): if job_obj.get('download') == 'True' and not opts.mark_ignore_dl: return status_mon.register_job_result(jobnum, 'All files already downloaded', JobDownloadStatus.JOB_ALREADY) # Read the file hash entries from job info file fi_list = FileInfoProcessor().process( os.path.join(work_dn, 'output', 'job_%d' % jobnum)) or [] is_download_failed = False if not fi_list: if opts.mark_empty_fail: is_download_failed = True else: return status_mon.register_job_result( jobnum, 'Job has no output files', JobDownloadStatus.JOB_NO_OUTPUT) download_result_list = [] progress = ProgressActivity('Processing output files', len(fi_list)) for (fi_idx, fi) in enumerate(fi_list): progress.update_progress(fi_idx, msg='Processing output file %r' % fi[FileInfo.NameDest]) download_result_list.append( download_single_file(opts, jobnum, fi_idx, fi, status_mon)) progress.finish() is_download_failed = is_download_failed or any( imap(download_result_list.__contains__, [ FileDownloadStatus.FILE_TIMEOUT, FileDownloadStatus.FILE_HASH_FAILED, FileDownloadStatus.FILE_TRANSFER_FAILED, FileDownloadStatus.FILE_MKDIR_FAILED ])) is_download_success = all( imap([FileDownloadStatus.FILE_OK, FileDownloadStatus.FILE_EXISTS].__contains__, download_result_list)) # Ignore the first opts.retry number of failed jobs retry_count = int(job_obj.get('download attempt', 0)) if fi_list and is_download_failed and opts.retry and (retry_count < int( opts.retry)): set_job_prop(job_db, jobnum, job_obj, 'download attempt', str(retry_count + 1)) return status_mon.register_job_result( jobnum, 'Download attempt #%d failed' % retry_count + 1, JobDownloadStatus.RETRY) delete_files(opts, jobnum, fi_list, is_download_failed) if is_download_failed: if opts.mark_fail: # Mark job as failed to trigger resubmission job_obj.state = Job.FAILED job_db.commit(jobnum, job_obj) status_mon.register_job_result(jobnum, 'Download failed', JobDownloadStatus.JOB_FAILED) elif is_download_success: if opts.mark_dl: # Mark as downloaded set_job_prop(job_db, jobnum, job_obj, 'download', 'True') status_mon.register_job_result(jobnum, 'Download successful', JobDownloadStatus.JOB_OK) else: # eg. because of SE blacklist status_mon.register_job_result(jobnum, 'Download incomplete', JobDownloadStatus.JOB_INCOMPLETE)