def verify(conf, type = 'data', report_file=None): """ HDF file structure verifier. Parameters ---------- conf : str configuration file name, including path type : str a string characterizung the data type (i.e. data_dark, data_white or data), defaulted to 'data' report_file : file a file where the report will be written, defaulted to None, if no report wanted Returns ------- boolean """ logger, limits, quality_checks, feedback, feedback_pv, report_type = init(conf) aggregateq = Queue() args = limits[type], aggregateq, quality_checks, feedback, feedback_pv feed.feed_data(conf, logger, *args) bad_indexes = {} aggregate = aggregateq.get() if report_file is not None: report.report_results(logger, aggregate, type, None, report_file, report_type) report.add_bad_indexes(aggregate, type, bad_indexes) return bad_indexes
def verify(self, config, report_file=None, sequence = None): """ This function starts real time verification process according to the given configuration. This function reads configuration and initiates variables accordingly. It creates a Feed instance and starts data_feed and waits to receive results in aggregateq. The results are then written into a report file. Parameters ---------- conf : str configuration file name, including path report_file : file a file where the report will be written, defaulted to None, if no report wanted sequence : list or int information about data sequence or number of frames Returns ------- boolean """ def get_decor(qc): decor = {} qc = [6] if const.QUALITYCHECK_RATE_SAT in qc: decor[const.QUALITYCHECK_RATE_SAT] = detector + ":" + detector_basic +":AcquireTime" return decor logger, limits, quality_checks, feedback, report_type, consumers = init(config) no_frames, aggregate_limit, detector, detector_basic, detector_image = adapter.parse_config(config) aggregateq = Queue() # address the special cases of quality checks when additional arguments are required decor = get_decor(quality_checks) if len(decor) is 0: self.feed = Feed() else: self.feed = FeedDecorator(decor) aggregate_limit = no_frames args = limits, aggregateq, quality_checks, aggregate_limit, consumers, feedback, detector ack = self.feed.feed_data(no_frames, detector, detector_basic, detector_image, logger, sequence, *args) if ack == 1: bad_indexes = {} aggregate = aggregateq.get() if report_file is not None: report.report_results(logger, aggregate, None, report_file, report_type) report.add_bad_indexes(aggregate, bad_indexes) return bad_indexes
def verify_file_ge(logger, file, limits, quality_checks, report_type, report_dir, consumers): """ This method handles verification of data in a ge file type. This method creates and starts a new handler process. The handler is initialized with data queue, the data type, which is 'data', and a result queue. After starting the process the function enqueues queue slice by slice into data, until all data is queued. The last enqueued element is end of the data marker. Parameters ---------- logger: Logger Logger instance. file : str a filename including path that will be verified limits : dict a dictionary of limits values quality_checks : dict a dictinary specifying quality checks structure that will be applied to verify the data file report_type : int report type, currently supporting 'none', 'errors', and 'full' report_dir : str a directory where report files will be located consumers : dict a dictionary containing consumer processes to run, and their parameters Returns ------- bad_indexes : dict a dictionary of bad indexes per data type """ type = 'data' fp, nframes, fsize = utils.get_data_ge(logger, file) # data file is corrupted, error message is logged if fp is None: return None dataq = Queue() aggregateq = Queue() p = Process(target=handler.handle_data, args=(dataq, limits, aggregateq, quality_checks, consumers)) p.start() for i in range(0, nframes): img = np.fromfile(fp, 'uint16', fsize) dataq.put(Data(const.DATA_STATUS_DATA, img, type)) time.sleep(.2) dataq.put(Data(const.DATA_STATUS_END)) # receive the results bad_indexes = {} aggregate = aggregateq.get() report.add_bad_indexes(aggregate, bad_indexes) if report_type != const.REPORT_NONE: if report_dir is None: report_file = file + '.report' else: file_path = file.rsplit("/", ) report_file = report_dir + "/" + file_path[len(file_path) - 1] + '.report' report.report_results(logger, aggregate, file, report_file, report_type) logger.info('data verifier evaluated ' + file + ' file') return bad_indexes
def verify_file_hdf(logger, file, data_tags, limits, quality_checks, report_type, report_dir, consumers): """ This method handles verification of data in hdf type file. This method creates and starts a new handler process. The handler is initialized with data queue, the data type, and a result queue. The data type can be 'data_dark', 'data_white' or 'data'. After starting the process the function enqueues queue slice by slice into data, until all data is queued. The last enqueued element is end of the data marker. Parameters ---------- logger: Logger Logger instance. file : str a filename including path that will be verified data_tags : dict a dictionary od data_type/hdf tag limits : dict a dictionary of limits values quality_checks : dict a dictinary specifying quality checks structure that will be applied to verify the data file report_type : int report type, currently supporting 'none', 'errors', and 'full' report_dir : str a directory where report files will be located consumers : dict a dictionary containing consumer processes to run, and their parameters Returns ------- bad_indexes : dict a dictionary of bad indexes per data type """ def process_data(data_type): data_tag = data_tags[data_type] dt = fp[data_tag] for i in range(0, dt.shape[0]): data = Data(const.DATA_STATUS_DATA, dt[i], data_type) dataq.put(data) # add delay to slow down flow up, so the flow down (results) # are handled in synch time.sleep(.1) fp, tags = utils.get_data_hdf(file) dataq = Queue() aggregateq = Queue() p = Process(target=handler.handle_data, args=(dataq, limits, aggregateq, quality_checks, consumers)) p.start() # assume a fixed order of data types; this will determine indexes on the data if 'data_dark' in data_tags: process_data('data_dark') if 'data_white' in data_tags: process_data('data_white') if 'data' in data_tags: process_data('data') dataq.put(Data(const.DATA_STATUS_END)) if report_type != const.REPORT_NONE: if report_dir is None: report_file = file.rsplit(".", )[0] + '.report' else: file = file.rsplit(".", )[0] file_path = file.rsplit("/", ) report_file = report_dir + "/" + file_path[len(file_path - 1)] + '.report' # receive the results bad_indexes = {} aggregate = aggregateq.get() if report_file is not None: report.report_results(logger, aggregate, None, report_file, report_type) report.add_bad_indexes(aggregate, bad_indexes) logger.info('data verifier evaluated ' + file + ' file') return bad_indexes
def verify(conf, folder, data_type, num_files, report_by_files=True): """ This function discovers new files and evaluates data in the files. This is the main function called when the verifier application starts. It reads the configuration for the directory to monitor, for pattern that represents a file extension to look for, and for a number of files that are expected for the experiment. The number of files configuration parameter is added for experiments that generate multiple files. In some cases the experiment data is collected into a single file, which is organized with data sets. The function calls directory function that sets up the monitoring and returns notifier. After the monitoring is initialized, it starts a loop that reads the global "*files*" queue and then the global "*results*" queue. If there is any new file, the file is removed from the queue, and the data in the file is validated by a sequence of validation methods. If there is any new result, the result is removed from the queue, corresponding process is terminated, and the result is presented. (currently printed on console, later will be pushed into an EPICS process variable) The loop is interrupted when all expected processes produced results. The number of expected processes is determined by number of files and number of validation functions. Parameters ---------- conf : str configuration file name, including path folder : str monitored directory data_type : str defines which data type is being evaluated num_files : int number of files that will be processed report_by_files : boolean this variable directs how to present the bad indexes in a report. If True, the indexes are related to the files, and a filename is included in the report. Otherwise, the report contains a list of bad indexes. Returns ------- bad_indexes : dict a dictionary or list containing bad indexes """ logger, limits, quality_checks, extensions, report_type, consumers = init( conf) if not os.path.isdir(folder): logger.error('parameter error: directory ' + folder + ' does not exist') sys.exit(-1) notifier = directory(folder, extensions) interrupted = False file_list = [] offset_list = [] dataq = Queue() aggregateq = Queue() p = Process(target=datahandler.handle_data, args=(dataq, limits, aggregateq, quality_checks, consumers)) p.start() file_index = 0 slice_index = 0 while not interrupted: # The notifier will put a new file into a newFiles queue if one was # detected notifier.process_events() if notifier.check_events(): notifier.read_events() # checking the newFiles queue for new entries and starting verification # processes for each new file while not files.empty(): file = files.get() if file.find('INTERRUPT') >= 0: # the calling function may use a 'interrupt' command to stop the monitoring # and processing. dataq.put(Data(const.DATA_STATUS_END)) notifier.stop() interrupted = True break else: if file_index == 0: report_file = file.rsplit(".", )[0] + '.report' fp, tags = utils.get_data_hdf(file) data_tag = tags['/exchange/' + data_type] data = np.asarray(fp[data_tag]) slice_index += data.shape[0] file_list.append(file) offset_list.append(slice_index) for i in range(0, data.shape[0]): dataq.put(Data(const.DATA_STATUS_DATA, data[i], data_type)) file_index += 1 if file_index == num_files: dataq.put(Data(const.DATA_STATUS_END)) notifier.stop() interrupted = True break aggregate = aggregateq.get() #report.report_results(logger, aggregate, data_type, None, report_file, report_type) bad_indexes = {} if report_by_files == 'True': report.add_bad_indexes_per_file(aggregate, bad_indexes, file_list, offset_list) else: report.add_bad_indexes(aggregate, bad_indexes) try: report_file = open(report_file, 'w') report.report_bad_indexes(bad_indexes, report_file) except: logger.warning('Cannot open report file') return bad_indexes
def verify(conf, folder, data_type, num_files, report_by_files=True): """ This function discovers new files and evaluates data in the files. This is the main function called when the verifier application starts. It reads the configuration for the directory to monitor, for pattern that represents a file extension to look for, and for a number of files that are expected for the experiment. The number of files configuration parameter is added for experiments that generate multiple files. In some cases the experiment data is collected into a single file, which is organized with data sets. The function calls directory function that sets up the monitoring and returns notifier. After the monitoring is initialized, it starts a loop that reads the global "*files*" queue and then the global "*results*" queue. If there is any new file, the file is removed from the queue, and the data in the file is validated by a sequence of validation methods. If there is any new result, the result is removed from the queue, corresponding process is terminated, and the result is presented. (currently printed on console, later will be pushed into an EPICS process variable) The loop is interrupted when all expected processes produced results. The number of expected processes is determined by number of files and number of validation functions. Parameters ---------- conf : str configuration file name, including path folder : str monitored directory data_type : str defines which data type is being evaluated num_files : int number of files that will be processed report_by_files : boolean this variable directs how to present the bad indexes in a report. If True, the indexes are related to the files, and a filename is included in the report. Otherwise, the report contains a list of bad indexes. Returns ------- bad_indexes : dict a dictionary or list containing bad indexes """ logger, limits, quality_checks, extensions, report_type, consumers = init(conf) if not os.path.isdir(folder): logger.error( 'parameter error: directory ' + folder + ' does not exist') sys.exit(-1) notifier = directory(folder, extensions) interrupted = False file_list = [] offset_list = [] dataq = Queue() aggregateq = Queue() p = Process(target=datahandler.handle_data, args=(dataq, limits, aggregateq, quality_checks, consumers)) p.start() file_index = 0 slice_index = 0 while not interrupted: # The notifier will put a new file into a newFiles queue if one was # detected notifier.process_events() if notifier.check_events(): notifier.read_events() # checking the newFiles queue for new entries and starting verification # processes for each new file while not files.empty(): file = files.get() if file.find('INTERRUPT') >= 0: # the calling function may use a 'interrupt' command to stop the monitoring # and processing. dataq.put(Data(const.DATA_STATUS_END)) notifier.stop() interrupted = True break else: if file_index == 0: report_file = file.rsplit(".",)[0] + '.report' fp, tags = utils.get_data_hdf(file) data_tag = tags['/exchange/'+data_type] data = np.asarray(fp[data_tag]) slice_index += data.shape[0] file_list.append(file) offset_list.append(slice_index) for i in range(0, data.shape[0]): dataq.put(Data(const.DATA_STATUS_DATA, data[i], data_type)) file_index += 1 if file_index == num_files: dataq.put(Data(const.DATA_STATUS_END)) notifier.stop() interrupted = True break aggregate = aggregateq.get() #report.report_results(logger, aggregate, data_type, None, report_file, report_type) bad_indexes = {} if report_by_files == 'True': report.add_bad_indexes_per_file(aggregate, bad_indexes, file_list, offset_list) else: report.add_bad_indexes(aggregate, bad_indexes) try: report_file = open(report_file, 'w') report.report_bad_indexes(bad_indexes, report_file) except: logger.warning('Cannot open report file') return bad_indexes
def verify_file_ge(logger, file, limits, quality_checks, report_type, report_dir): """ This method handles verification of ge file type. This method creates and starts a new handler process. The handler is initialized with data queue, the data type, which is 'data', and a result queue. After starting the process the function enqueues queue slice by slice into data, until all data is queued. The last enqueued element is end of the data marker. Parameters ---------- logger: Logger Logger instance. file : str a filename including path that will be verified data_tags : dict a dictionary od data_type/hdf tag limits : dict a dictionary of limits values quality_checks : dict a dictinary specifying quality checks structure that will be applied to verify the data file report_type : int report type, currently supporting 'none', 'errors', and 'full' report_dir : str a directory where report files will be located Returns ------- bad_indexes : dict a dictionary of bad indexes per data type """ type = 'data' fp, nframes, fsize = utils.get_data_ge(logger, file) # data file is corrupted, error message is logged if fp is None: return None dataq = Queue() aggregateq = Queue() p = Process(target=handler.handle_data, args=(dataq, limits['data'], aggregateq, quality_checks)) p.start() for i in range(0,nframes): img = np.fromfile(fp,'uint16', fsize) dataq.put(Data(img)) dataq.put('all_data') bad_indexes = {} aggregate = aggregateq.get() report.add_bad_indexes(aggregate, type, bad_indexes) if report_type != const.REPORT_NONE: if report_dir is None: report_file = file + '.report' else: file_path = file.rsplit("/",) report_file = report_dir + "/" + file_path[len(file_path)-1]+ '.report' report.report_results(logger, aggregate, type, file, report_file, report_type) logger.info('data verifier evaluated ' + file + ' file') return bad_indexes
def verify_file_hdf(logger, file, data_tags, limits, quality_checks, report_type, report_dir): """ This method handles verification of hdf type file. This method creates and starts a new handler process. The handler is initialized with data queue, the data type, and a result queue. The data type can be 'data_dark', 'data_white' or 'data'. After starting the process the function enqueues queue slice by slice into data, until all data is queued. The last enqueued element is end of the data marker. Parameters ---------- logger: Logger Logger instance. file : str a filename including path that will be verified data_tags : dict a dictionary od data_type/hdf tag limits : dict a dictionary of limits values quality_checks : dict a dictinary specifying quality checks structure that will be applied to verify the data file report_type : int report type, currently supporting 'none', 'errors', and 'full' report_dir : str a directory where report files will be located Returns ------- bad_indexes : dict a dictionary of bad indexes per data type """ fp, tags = utils.get_data_hdf(file) queues = {} bad_indexes = {} for type in data_tags.keys(): data_tag = data_tags[type] if data_tag in tags: queue = Queue() queues[type] = queue process_data(type, queue, fp, data_tag, limits, quality_checks) if report_type != const.REPORT_NONE: if report_dir is None: report_file = file.rsplit(".",)[0] + '.report' else: file = file.rsplit(".",)[0] file_path = file.rsplit("/",) report_file = report_dir + "/" + file_path[len(file_path-1)]+ '.report' # receive the results for type in queues.keys(): queue = queues[type] aggregate = queue.get() report.add_bad_indexes(aggregate, type, bad_indexes) if report_file is not None: report.report_results(logger, aggregate, type, file, report_file, report_type) logger.info('data verifier evaluated ' + file + ' file') return bad_indexes
def verify(self, config, report_file=None, sequence=None): """ This function starts real time verification process according to the given configuration. This function reads configuration and initiates variables accordingly. It creates a Feed instance and starts data_feed and waits to receive results in aggregateq. The results are then written into a report file. Parameters ---------- conf : str configuration file name, including path report_file : file a file where the report will be written, defaulted to None, if no report wanted sequence : list or int information about data sequence or number of frames Returns ------- boolean """ feed_args, feed_kwargs, feedback, decor_map, logger, report_type = self.init( config) # init the pv feedback if not feedback is None: feedbackq = Queue() feedback_pvs = utils.get_feedback_pvs(feed_args[1]) fb_args = { 'feedback_pvs': feedback_pvs, 'detector': feed_kwargs['detector'] } feedback_obj = fb.Feedback(feedbackq, feedback, **fb_args) # put the logger to args if const.FEEDBACK_LOG in feedback: feedback_obj.set_logger(logger) feed_kwargs['feedbackq'] = feedbackq self.p = Process(target=feedback_obj.deliver, args=()) self.p.start() reportq = Queue() # address the special cases of quality checks when additional arguments are required if decor_map is None: self.feed = Feed() else: self.feed = FeedDecorator(decor_map) ack = self.feed.feed_data(logger, reportq, *feed_args, **feed_kwargs) if ack == 1: bad_indexes = {} aggregate = reportq.get() if report_file is not None: report.report_results(logger, aggregate, None, report_file, report_type) report.add_bad_indexes(aggregate, bad_indexes) return bad_indexes
def verify_file_hdf(logger, file, data_tags, limits, quality_checks, report_type, report_dir, consumers): """ This method handles verification of data in hdf type file. This method creates and starts a new handler process. The handler is initialized with data queue, the data type, and a result queue. The data type can be 'data_dark', 'data_white' or 'data'. After starting the process the function enqueues queue slice by slice into data, until all data is queued. The last enqueued element is end of the data marker. Parameters ---------- logger: Logger Logger instance. file : str a filename including path that will be verified data_tags : dict a dictionary od data_type/hdf tag limits : dict a dictionary of limits values quality_checks : dict a dictinary specifying quality checks structure that will be applied to verify the data file report_type : int report type, currently supporting 'none', 'errors', and 'full' report_dir : str a directory where report files will be located consumers : dict a dictionary containing consumer processes to run, and their parameters Returns ------- bad_indexes : dict a dictionary of bad indexes per data type """ def process_data(data_type): data_tag = data_tags[data_type] dt = fp[data_tag] for i in range(0,dt.shape[0]): data = Data(const.DATA_STATUS_DATA, dt[i], data_type) dataq.put(data) # add delay to slow down flow up, so the flow down (results) # are handled in synch time.sleep(.1) fp, tags = utils.get_data_hdf(file) dataq = Queue() aggregateq = Queue() p = Process(target=handler.handle_data, args=(dataq, limits, aggregateq, quality_checks, consumers)) p.start() # assume a fixed order of data types; this will determine indexes on the data if 'data_dark' in data_tags: process_data('data_dark') if 'data_white' in data_tags: process_data('data_white') if 'data' in data_tags: process_data('data') dataq.put(Data(const.DATA_STATUS_END)) if report_type != const.REPORT_NONE: if report_dir is None: report_file = file.rsplit(".",)[0] + '.report' else: file = file.rsplit(".",)[0] file_path = file.rsplit("/",) report_file = report_dir + "/" + file_path[len(file_path-1)]+ '.report' # receive the results bad_indexes = {} aggregate = aggregateq.get() if report_file is not None: report.report_results(logger, aggregate, None, report_file, report_type) report.add_bad_indexes(aggregate, bad_indexes) logger.info('data verifier evaluated ' + file + ' file') return bad_indexes